diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,35153 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 15.0, + "global_step": 585540, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.99914608737234e-05, + "loss": 8.1433, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982921747446805e-05, + "loss": 5.3387, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.99743826211702e-05, + "loss": 4.6505, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 4.99658434948936e-05, + "loss": 4.5935, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 4.9957304368617e-05, + "loss": 4.1372, + "step": 500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948765242340405e-05, + "loss": 4.2621, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940226116063806e-05, + "loss": 4.2878, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 4.993168698978721e-05, + "loss": 4.3196, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 4.99231478635106e-05, + "loss": 4.0911, + "step": 900 + }, + { + "epoch": 0.03, + "learning_rate": 4.991460873723401e-05, + "loss": 4.2543, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9906069610957406e-05, + "loss": 4.2923, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 4.989753048468081e-05, + "loss": 4.3948, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 4.988899135840421e-05, + "loss": 4.1853, + "step": 1300 + }, + { + "epoch": 0.04, + "learning_rate": 4.988045223212761e-05, + "loss": 3.8924, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 4.987191310585101e-05, + "loss": 4.4658, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9863373979574414e-05, + "loss": 3.9246, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 4.985483485329781e-05, + "loss": 4.2261, + "step": 1700 + }, + { + "epoch": 0.05, + "learning_rate": 4.984629572702122e-05, + "loss": 4.5428, + "step": 1800 + }, + { + "epoch": 0.05, + "learning_rate": 4.983775660074461e-05, + "loss": 3.9668, + "step": 1900 + }, + { + "epoch": 0.05, + "learning_rate": 4.982921747446801e-05, + "loss": 4.412, + "step": 2000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9820678348191415e-05, + "loss": 4.2224, + "step": 2100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9812139221914816e-05, + "loss": 4.4448, + "step": 2200 + }, + { + "epoch": 0.06, + "learning_rate": 4.980360009563821e-05, + "loss": 4.1606, + "step": 2300 + }, + { + "epoch": 0.06, + "learning_rate": 4.979506096936162e-05, + "loss": 4.335, + "step": 2400 + }, + { + "epoch": 0.06, + "learning_rate": 4.9786521843085014e-05, + "loss": 3.945, + "step": 2500 + }, + { + "epoch": 0.07, + "learning_rate": 4.977798271680842e-05, + "loss": 4.0949, + "step": 2600 + }, + { + "epoch": 0.07, + "learning_rate": 4.976944359053182e-05, + "loss": 4.4423, + "step": 2700 + }, + { + "epoch": 0.07, + "learning_rate": 4.976090446425522e-05, + "loss": 3.9127, + "step": 2800 + }, + { + "epoch": 0.07, + "learning_rate": 4.975236533797862e-05, + "loss": 3.8504, + "step": 2900 + }, + { + "epoch": 0.08, + "learning_rate": 4.974382621170202e-05, + "loss": 4.1752, + "step": 3000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9735287085425417e-05, + "loss": 4.2515, + "step": 3100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9726747959148825e-05, + "loss": 4.1318, + "step": 3200 + }, + { + "epoch": 0.08, + "learning_rate": 4.971820883287222e-05, + "loss": 3.993, + "step": 3300 + }, + { + "epoch": 0.09, + "learning_rate": 4.970966970659562e-05, + "loss": 3.8842, + "step": 3400 + }, + { + "epoch": 0.09, + "learning_rate": 4.970113058031902e-05, + "loss": 4.1074, + "step": 3500 + }, + { + "epoch": 0.09, + "learning_rate": 4.9692591454042424e-05, + "loss": 4.203, + "step": 3600 + }, + { + "epoch": 0.09, + "learning_rate": 4.9684052327765826e-05, + "loss": 3.884, + "step": 3700 + }, + { + "epoch": 0.1, + "learning_rate": 4.967551320148923e-05, + "loss": 3.9207, + "step": 3800 + }, + { + "epoch": 0.1, + "learning_rate": 4.966697407521262e-05, + "loss": 3.9057, + "step": 3900 + }, + { + "epoch": 0.1, + "learning_rate": 4.965843494893603e-05, + "loss": 4.1704, + "step": 4000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9649895822659425e-05, + "loss": 3.9624, + "step": 4100 + }, + { + "epoch": 0.11, + "learning_rate": 4.964135669638283e-05, + "loss": 4.2685, + "step": 4200 + }, + { + "epoch": 0.11, + "learning_rate": 4.963281757010623e-05, + "loss": 4.0752, + "step": 4300 + }, + { + "epoch": 0.11, + "learning_rate": 4.962427844382963e-05, + "loss": 4.2952, + "step": 4400 + }, + { + "epoch": 0.12, + "learning_rate": 4.961573931755303e-05, + "loss": 3.9692, + "step": 4500 + }, + { + "epoch": 0.12, + "learning_rate": 4.960720019127643e-05, + "loss": 4.1518, + "step": 4600 + }, + { + "epoch": 0.12, + "learning_rate": 4.959866106499983e-05, + "loss": 4.0681, + "step": 4700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9590121938723236e-05, + "loss": 4.0123, + "step": 4800 + }, + { + "epoch": 0.13, + "learning_rate": 4.958158281244663e-05, + "loss": 4.1172, + "step": 4900 + }, + { + "epoch": 0.13, + "learning_rate": 4.957304368617003e-05, + "loss": 3.9204, + "step": 5000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9564504559893434e-05, + "loss": 4.104, + "step": 5100 + }, + { + "epoch": 0.13, + "learning_rate": 4.9555965433616835e-05, + "loss": 4.1304, + "step": 5200 + }, + { + "epoch": 0.14, + "learning_rate": 4.954742630734023e-05, + "loss": 4.1386, + "step": 5300 + }, + { + "epoch": 0.14, + "learning_rate": 4.953888718106364e-05, + "loss": 4.0127, + "step": 5400 + }, + { + "epoch": 0.14, + "learning_rate": 4.953034805478703e-05, + "loss": 3.9139, + "step": 5500 + }, + { + "epoch": 0.14, + "learning_rate": 4.9521808928510435e-05, + "loss": 3.9059, + "step": 5600 + }, + { + "epoch": 0.15, + "learning_rate": 4.9513269802233836e-05, + "loss": 3.8485, + "step": 5700 + }, + { + "epoch": 0.15, + "learning_rate": 4.950473067595724e-05, + "loss": 4.0259, + "step": 5800 + }, + { + "epoch": 0.15, + "learning_rate": 4.949619154968064e-05, + "loss": 3.9548, + "step": 5900 + }, + { + "epoch": 0.15, + "learning_rate": 4.948765242340404e-05, + "loss": 4.1649, + "step": 6000 + }, + { + "epoch": 0.16, + "learning_rate": 4.9479113297127436e-05, + "loss": 3.9737, + "step": 6100 + }, + { + "epoch": 0.16, + "learning_rate": 4.9470574170850844e-05, + "loss": 4.2423, + "step": 6200 + }, + { + "epoch": 0.16, + "learning_rate": 4.946203504457424e-05, + "loss": 4.0339, + "step": 6300 + }, + { + "epoch": 0.16, + "learning_rate": 4.945349591829764e-05, + "loss": 4.0146, + "step": 6400 + }, + { + "epoch": 0.17, + "learning_rate": 4.944495679202104e-05, + "loss": 4.097, + "step": 6500 + }, + { + "epoch": 0.17, + "learning_rate": 4.9436417665744444e-05, + "loss": 3.8055, + "step": 6600 + }, + { + "epoch": 0.17, + "learning_rate": 4.9427878539467845e-05, + "loss": 4.1866, + "step": 6700 + }, + { + "epoch": 0.17, + "learning_rate": 4.941933941319125e-05, + "loss": 4.2445, + "step": 6800 + }, + { + "epoch": 0.18, + "learning_rate": 4.941080028691464e-05, + "loss": 3.9666, + "step": 6900 + }, + { + "epoch": 0.18, + "learning_rate": 4.940226116063805e-05, + "loss": 3.9317, + "step": 7000 + }, + { + "epoch": 0.18, + "learning_rate": 4.9393722034361445e-05, + "loss": 3.6663, + "step": 7100 + }, + { + "epoch": 0.18, + "learning_rate": 4.9385182908084846e-05, + "loss": 3.7571, + "step": 7200 + }, + { + "epoch": 0.19, + "learning_rate": 4.937664378180825e-05, + "loss": 3.772, + "step": 7300 + }, + { + "epoch": 0.19, + "learning_rate": 4.936810465553165e-05, + "loss": 3.5315, + "step": 7400 + }, + { + "epoch": 0.19, + "learning_rate": 4.935956552925505e-05, + "loss": 3.9508, + "step": 7500 + }, + { + "epoch": 0.19, + "learning_rate": 4.935102640297845e-05, + "loss": 4.0053, + "step": 7600 + }, + { + "epoch": 0.2, + "learning_rate": 4.934248727670185e-05, + "loss": 3.8292, + "step": 7700 + }, + { + "epoch": 0.2, + "learning_rate": 4.9333948150425255e-05, + "loss": 3.9688, + "step": 7800 + }, + { + "epoch": 0.2, + "learning_rate": 4.932540902414865e-05, + "loss": 3.8298, + "step": 7900 + }, + { + "epoch": 0.2, + "learning_rate": 4.931686989787205e-05, + "loss": 3.8508, + "step": 8000 + }, + { + "epoch": 0.21, + "learning_rate": 4.930833077159545e-05, + "loss": 3.6922, + "step": 8100 + }, + { + "epoch": 0.21, + "learning_rate": 4.9299791645318855e-05, + "loss": 3.8841, + "step": 8200 + }, + { + "epoch": 0.21, + "learning_rate": 4.929125251904225e-05, + "loss": 3.7595, + "step": 8300 + }, + { + "epoch": 0.22, + "learning_rate": 4.928271339276566e-05, + "loss": 3.6968, + "step": 8400 + }, + { + "epoch": 0.22, + "learning_rate": 4.927417426648905e-05, + "loss": 3.7239, + "step": 8500 + }, + { + "epoch": 0.22, + "learning_rate": 4.9265635140212454e-05, + "loss": 3.7007, + "step": 8600 + }, + { + "epoch": 0.22, + "learning_rate": 4.9257096013935856e-05, + "loss": 3.762, + "step": 8700 + }, + { + "epoch": 0.23, + "learning_rate": 4.924855688765926e-05, + "loss": 3.912, + "step": 8800 + }, + { + "epoch": 0.23, + "learning_rate": 4.924001776138266e-05, + "loss": 3.8828, + "step": 8900 + }, + { + "epoch": 0.23, + "learning_rate": 4.923147863510606e-05, + "loss": 3.7351, + "step": 9000 + }, + { + "epoch": 0.23, + "learning_rate": 4.9222939508829455e-05, + "loss": 3.7754, + "step": 9100 + }, + { + "epoch": 0.24, + "learning_rate": 4.9214400382552864e-05, + "loss": 4.0024, + "step": 9200 + }, + { + "epoch": 0.24, + "learning_rate": 4.920586125627626e-05, + "loss": 3.946, + "step": 9300 + }, + { + "epoch": 0.24, + "learning_rate": 4.919732212999966e-05, + "loss": 3.7708, + "step": 9400 + }, + { + "epoch": 0.24, + "learning_rate": 4.918878300372306e-05, + "loss": 3.7317, + "step": 9500 + }, + { + "epoch": 0.25, + "learning_rate": 4.918024387744646e-05, + "loss": 3.9509, + "step": 9600 + }, + { + "epoch": 0.25, + "learning_rate": 4.9171704751169864e-05, + "loss": 3.5076, + "step": 9700 + }, + { + "epoch": 0.25, + "learning_rate": 4.9163165624893266e-05, + "loss": 3.9101, + "step": 9800 + }, + { + "epoch": 0.25, + "learning_rate": 4.915462649861666e-05, + "loss": 3.9417, + "step": 9900 + }, + { + "epoch": 0.26, + "learning_rate": 4.914608737234007e-05, + "loss": 3.9119, + "step": 10000 + }, + { + "epoch": 0.26, + "learning_rate": 4.9137548246063464e-05, + "loss": 3.9257, + "step": 10100 + }, + { + "epoch": 0.26, + "learning_rate": 4.9129009119786865e-05, + "loss": 4.0386, + "step": 10200 + }, + { + "epoch": 0.26, + "learning_rate": 4.912046999351027e-05, + "loss": 3.942, + "step": 10300 + }, + { + "epoch": 0.27, + "learning_rate": 4.911193086723367e-05, + "loss": 3.7424, + "step": 10400 + }, + { + "epoch": 0.27, + "learning_rate": 4.910339174095706e-05, + "loss": 3.5835, + "step": 10500 + }, + { + "epoch": 0.27, + "learning_rate": 4.909485261468047e-05, + "loss": 3.9946, + "step": 10600 + }, + { + "epoch": 0.27, + "learning_rate": 4.9086313488403866e-05, + "loss": 3.9583, + "step": 10700 + }, + { + "epoch": 0.28, + "learning_rate": 4.907777436212727e-05, + "loss": 3.6759, + "step": 10800 + }, + { + "epoch": 0.28, + "learning_rate": 4.906923523585067e-05, + "loss": 3.8039, + "step": 10900 + }, + { + "epoch": 0.28, + "learning_rate": 4.906069610957407e-05, + "loss": 3.8028, + "step": 11000 + }, + { + "epoch": 0.28, + "learning_rate": 4.905215698329747e-05, + "loss": 3.6921, + "step": 11100 + }, + { + "epoch": 0.29, + "learning_rate": 4.9043617857020874e-05, + "loss": 3.5227, + "step": 11200 + }, + { + "epoch": 0.29, + "learning_rate": 4.903507873074427e-05, + "loss": 3.6822, + "step": 11300 + }, + { + "epoch": 0.29, + "learning_rate": 4.902653960446768e-05, + "loss": 3.5773, + "step": 11400 + }, + { + "epoch": 0.29, + "learning_rate": 4.901800047819107e-05, + "loss": 3.7838, + "step": 11500 + }, + { + "epoch": 0.3, + "learning_rate": 4.9009461351914474e-05, + "loss": 3.7591, + "step": 11600 + }, + { + "epoch": 0.3, + "learning_rate": 4.9000922225637875e-05, + "loss": 3.8082, + "step": 11700 + }, + { + "epoch": 0.3, + "learning_rate": 4.899238309936128e-05, + "loss": 3.7714, + "step": 11800 + }, + { + "epoch": 0.3, + "learning_rate": 4.898384397308468e-05, + "loss": 3.7589, + "step": 11900 + }, + { + "epoch": 0.31, + "learning_rate": 4.897530484680808e-05, + "loss": 3.8622, + "step": 12000 + }, + { + "epoch": 0.31, + "learning_rate": 4.8966765720531475e-05, + "loss": 3.7458, + "step": 12100 + }, + { + "epoch": 0.31, + "learning_rate": 4.895822659425488e-05, + "loss": 3.5666, + "step": 12200 + }, + { + "epoch": 0.32, + "learning_rate": 4.894968746797828e-05, + "loss": 3.773, + "step": 12300 + }, + { + "epoch": 0.32, + "learning_rate": 4.894114834170168e-05, + "loss": 3.792, + "step": 12400 + }, + { + "epoch": 0.32, + "learning_rate": 4.893260921542508e-05, + "loss": 3.8636, + "step": 12500 + }, + { + "epoch": 0.32, + "learning_rate": 4.892407008914848e-05, + "loss": 3.9016, + "step": 12600 + }, + { + "epoch": 0.33, + "learning_rate": 4.8915530962871884e-05, + "loss": 3.5966, + "step": 12700 + }, + { + "epoch": 0.33, + "learning_rate": 4.8906991836595285e-05, + "loss": 3.7381, + "step": 12800 + }, + { + "epoch": 0.33, + "learning_rate": 4.889845271031868e-05, + "loss": 3.8202, + "step": 12900 + }, + { + "epoch": 0.33, + "learning_rate": 4.888991358404209e-05, + "loss": 3.7938, + "step": 13000 + }, + { + "epoch": 0.34, + "learning_rate": 4.888137445776548e-05, + "loss": 3.668, + "step": 13100 + }, + { + "epoch": 0.34, + "learning_rate": 4.8872835331488885e-05, + "loss": 3.598, + "step": 13200 + }, + { + "epoch": 0.34, + "learning_rate": 4.8864296205212286e-05, + "loss": 3.8545, + "step": 13300 + }, + { + "epoch": 0.34, + "learning_rate": 4.885575707893569e-05, + "loss": 3.7405, + "step": 13400 + }, + { + "epoch": 0.35, + "learning_rate": 4.884721795265908e-05, + "loss": 3.7744, + "step": 13500 + }, + { + "epoch": 0.35, + "learning_rate": 4.883867882638249e-05, + "loss": 3.7296, + "step": 13600 + }, + { + "epoch": 0.35, + "learning_rate": 4.8830139700105886e-05, + "loss": 3.7762, + "step": 13700 + }, + { + "epoch": 0.35, + "learning_rate": 4.882160057382929e-05, + "loss": 3.6065, + "step": 13800 + }, + { + "epoch": 0.36, + "learning_rate": 4.881306144755269e-05, + "loss": 3.8279, + "step": 13900 + }, + { + "epoch": 0.36, + "learning_rate": 4.880452232127609e-05, + "loss": 3.7685, + "step": 14000 + }, + { + "epoch": 0.36, + "learning_rate": 4.879598319499949e-05, + "loss": 3.7034, + "step": 14100 + }, + { + "epoch": 0.36, + "learning_rate": 4.8787444068722893e-05, + "loss": 3.963, + "step": 14200 + }, + { + "epoch": 0.37, + "learning_rate": 4.877890494244629e-05, + "loss": 3.6013, + "step": 14300 + }, + { + "epoch": 0.37, + "learning_rate": 4.8770365816169697e-05, + "loss": 3.6733, + "step": 14400 + }, + { + "epoch": 0.37, + "learning_rate": 4.876182668989309e-05, + "loss": 3.4549, + "step": 14500 + }, + { + "epoch": 0.37, + "learning_rate": 4.875328756361649e-05, + "loss": 3.5551, + "step": 14600 + }, + { + "epoch": 0.38, + "learning_rate": 4.8744748437339894e-05, + "loss": 3.6137, + "step": 14700 + }, + { + "epoch": 0.38, + "learning_rate": 4.8736209311063296e-05, + "loss": 3.9688, + "step": 14800 + }, + { + "epoch": 0.38, + "learning_rate": 4.87276701847867e-05, + "loss": 3.7047, + "step": 14900 + }, + { + "epoch": 0.38, + "learning_rate": 4.87191310585101e-05, + "loss": 3.5083, + "step": 15000 + }, + { + "epoch": 0.39, + "learning_rate": 4.8710591932233494e-05, + "loss": 3.7918, + "step": 15100 + }, + { + "epoch": 0.39, + "learning_rate": 4.87020528059569e-05, + "loss": 3.7162, + "step": 15200 + }, + { + "epoch": 0.39, + "learning_rate": 4.86935136796803e-05, + "loss": 3.6648, + "step": 15300 + }, + { + "epoch": 0.39, + "learning_rate": 4.86849745534037e-05, + "loss": 3.5669, + "step": 15400 + }, + { + "epoch": 0.4, + "learning_rate": 4.86764354271271e-05, + "loss": 3.659, + "step": 15500 + }, + { + "epoch": 0.4, + "learning_rate": 4.86678963008505e-05, + "loss": 3.9466, + "step": 15600 + }, + { + "epoch": 0.4, + "learning_rate": 4.8659357174573896e-05, + "loss": 3.4854, + "step": 15700 + }, + { + "epoch": 0.4, + "learning_rate": 4.8650818048297305e-05, + "loss": 3.7612, + "step": 15800 + }, + { + "epoch": 0.41, + "learning_rate": 4.86422789220207e-05, + "loss": 3.7588, + "step": 15900 + }, + { + "epoch": 0.41, + "learning_rate": 4.86337397957441e-05, + "loss": 3.4317, + "step": 16000 + }, + { + "epoch": 0.41, + "learning_rate": 4.86252006694675e-05, + "loss": 3.7622, + "step": 16100 + }, + { + "epoch": 0.42, + "learning_rate": 4.8616661543190904e-05, + "loss": 3.5345, + "step": 16200 + }, + { + "epoch": 0.42, + "learning_rate": 4.8608122416914306e-05, + "loss": 3.7285, + "step": 16300 + }, + { + "epoch": 0.42, + "learning_rate": 4.859958329063771e-05, + "loss": 3.5887, + "step": 16400 + }, + { + "epoch": 0.42, + "learning_rate": 4.85910441643611e-05, + "loss": 3.4669, + "step": 16500 + }, + { + "epoch": 0.43, + "learning_rate": 4.858250503808451e-05, + "loss": 3.535, + "step": 16600 + }, + { + "epoch": 0.43, + "learning_rate": 4.8573965911807905e-05, + "loss": 3.6845, + "step": 16700 + }, + { + "epoch": 0.43, + "learning_rate": 4.856542678553131e-05, + "loss": 3.6075, + "step": 16800 + }, + { + "epoch": 0.43, + "learning_rate": 4.855688765925471e-05, + "loss": 3.7537, + "step": 16900 + }, + { + "epoch": 0.44, + "learning_rate": 4.854834853297811e-05, + "loss": 3.6431, + "step": 17000 + }, + { + "epoch": 0.44, + "learning_rate": 4.853980940670151e-05, + "loss": 3.4635, + "step": 17100 + }, + { + "epoch": 0.44, + "learning_rate": 4.853127028042491e-05, + "loss": 3.5606, + "step": 17200 + }, + { + "epoch": 0.44, + "learning_rate": 4.852273115414831e-05, + "loss": 3.9068, + "step": 17300 + }, + { + "epoch": 0.45, + "learning_rate": 4.8514192027871716e-05, + "loss": 3.6972, + "step": 17400 + }, + { + "epoch": 0.45, + "learning_rate": 4.850565290159511e-05, + "loss": 3.6389, + "step": 17500 + }, + { + "epoch": 0.45, + "learning_rate": 4.849711377531851e-05, + "loss": 3.6478, + "step": 17600 + }, + { + "epoch": 0.45, + "learning_rate": 4.8488574649041914e-05, + "loss": 3.8012, + "step": 17700 + }, + { + "epoch": 0.46, + "learning_rate": 4.848003552276531e-05, + "loss": 3.5439, + "step": 17800 + }, + { + "epoch": 0.46, + "learning_rate": 4.847149639648872e-05, + "loss": 3.44, + "step": 17900 + }, + { + "epoch": 0.46, + "learning_rate": 4.846295727021211e-05, + "loss": 3.6266, + "step": 18000 + }, + { + "epoch": 0.46, + "learning_rate": 4.845441814393551e-05, + "loss": 3.2828, + "step": 18100 + }, + { + "epoch": 0.47, + "learning_rate": 4.8445879017658915e-05, + "loss": 3.2803, + "step": 18200 + }, + { + "epoch": 0.47, + "learning_rate": 4.8437339891382316e-05, + "loss": 3.7721, + "step": 18300 + }, + { + "epoch": 0.47, + "learning_rate": 4.842880076510571e-05, + "loss": 3.7414, + "step": 18400 + }, + { + "epoch": 0.47, + "learning_rate": 4.842026163882912e-05, + "loss": 3.6759, + "step": 18500 + }, + { + "epoch": 0.48, + "learning_rate": 4.8411722512552514e-05, + "loss": 3.6668, + "step": 18600 + }, + { + "epoch": 0.48, + "learning_rate": 4.8403183386275916e-05, + "loss": 3.6371, + "step": 18700 + }, + { + "epoch": 0.48, + "learning_rate": 4.839464425999932e-05, + "loss": 3.3712, + "step": 18800 + }, + { + "epoch": 0.48, + "learning_rate": 4.838610513372272e-05, + "loss": 3.5995, + "step": 18900 + }, + { + "epoch": 0.49, + "learning_rate": 4.837756600744612e-05, + "loss": 3.6567, + "step": 19000 + }, + { + "epoch": 0.49, + "learning_rate": 4.836902688116952e-05, + "loss": 3.626, + "step": 19100 + }, + { + "epoch": 0.49, + "learning_rate": 4.836048775489292e-05, + "loss": 3.7768, + "step": 19200 + }, + { + "epoch": 0.49, + "learning_rate": 4.8351948628616325e-05, + "loss": 3.2899, + "step": 19300 + }, + { + "epoch": 0.5, + "learning_rate": 4.834340950233972e-05, + "loss": 3.6196, + "step": 19400 + }, + { + "epoch": 0.5, + "learning_rate": 4.833487037606312e-05, + "loss": 3.4995, + "step": 19500 + }, + { + "epoch": 0.5, + "learning_rate": 4.832633124978652e-05, + "loss": 3.5778, + "step": 19600 + }, + { + "epoch": 0.5, + "learning_rate": 4.8317792123509924e-05, + "loss": 3.6503, + "step": 19700 + }, + { + "epoch": 0.51, + "learning_rate": 4.8309252997233326e-05, + "loss": 3.5208, + "step": 19800 + }, + { + "epoch": 0.51, + "learning_rate": 4.830071387095673e-05, + "loss": 3.5284, + "step": 19900 + }, + { + "epoch": 0.51, + "learning_rate": 4.829217474468012e-05, + "loss": 3.4265, + "step": 20000 + }, + { + "epoch": 0.51, + "learning_rate": 4.828363561840353e-05, + "loss": 3.597, + "step": 20100 + }, + { + "epoch": 0.52, + "learning_rate": 4.8275096492126925e-05, + "loss": 3.7741, + "step": 20200 + }, + { + "epoch": 0.52, + "learning_rate": 4.826655736585033e-05, + "loss": 3.8414, + "step": 20300 + }, + { + "epoch": 0.52, + "learning_rate": 4.825801823957373e-05, + "loss": 3.5326, + "step": 20400 + }, + { + "epoch": 0.53, + "learning_rate": 4.824947911329713e-05, + "loss": 3.7442, + "step": 20500 + }, + { + "epoch": 0.53, + "learning_rate": 4.8240939987020525e-05, + "loss": 3.5901, + "step": 20600 + }, + { + "epoch": 0.53, + "learning_rate": 4.823240086074393e-05, + "loss": 3.8139, + "step": 20700 + }, + { + "epoch": 0.53, + "learning_rate": 4.822386173446733e-05, + "loss": 3.5049, + "step": 20800 + }, + { + "epoch": 0.54, + "learning_rate": 4.821532260819073e-05, + "loss": 3.4308, + "step": 20900 + }, + { + "epoch": 0.54, + "learning_rate": 4.820678348191413e-05, + "loss": 3.5181, + "step": 21000 + }, + { + "epoch": 0.54, + "learning_rate": 4.819824435563753e-05, + "loss": 3.6863, + "step": 21100 + }, + { + "epoch": 0.54, + "learning_rate": 4.8189705229360934e-05, + "loss": 3.4282, + "step": 21200 + }, + { + "epoch": 0.55, + "learning_rate": 4.8181166103084336e-05, + "loss": 3.4732, + "step": 21300 + }, + { + "epoch": 0.55, + "learning_rate": 4.817262697680773e-05, + "loss": 3.6055, + "step": 21400 + }, + { + "epoch": 0.55, + "learning_rate": 4.816408785053114e-05, + "loss": 3.4187, + "step": 21500 + }, + { + "epoch": 0.55, + "learning_rate": 4.8155548724254534e-05, + "loss": 3.4109, + "step": 21600 + }, + { + "epoch": 0.56, + "learning_rate": 4.8147009597977935e-05, + "loss": 3.5652, + "step": 21700 + }, + { + "epoch": 0.56, + "learning_rate": 4.813847047170134e-05, + "loss": 3.5527, + "step": 21800 + }, + { + "epoch": 0.56, + "learning_rate": 4.812993134542474e-05, + "loss": 3.5741, + "step": 21900 + }, + { + "epoch": 0.56, + "learning_rate": 4.812139221914814e-05, + "loss": 3.6138, + "step": 22000 + }, + { + "epoch": 0.57, + "learning_rate": 4.811285309287154e-05, + "loss": 3.5562, + "step": 22100 + }, + { + "epoch": 0.57, + "learning_rate": 4.8104313966594936e-05, + "loss": 3.5844, + "step": 22200 + }, + { + "epoch": 0.57, + "learning_rate": 4.8095774840318344e-05, + "loss": 3.4481, + "step": 22300 + }, + { + "epoch": 0.57, + "learning_rate": 4.808723571404174e-05, + "loss": 3.547, + "step": 22400 + }, + { + "epoch": 0.58, + "learning_rate": 4.807869658776514e-05, + "loss": 3.5046, + "step": 22500 + }, + { + "epoch": 0.58, + "learning_rate": 4.807015746148854e-05, + "loss": 3.5041, + "step": 22600 + }, + { + "epoch": 0.58, + "learning_rate": 4.8061618335211944e-05, + "loss": 3.5439, + "step": 22700 + }, + { + "epoch": 0.58, + "learning_rate": 4.8053079208935345e-05, + "loss": 3.5554, + "step": 22800 + }, + { + "epoch": 0.59, + "learning_rate": 4.804454008265875e-05, + "loss": 3.517, + "step": 22900 + }, + { + "epoch": 0.59, + "learning_rate": 4.803600095638214e-05, + "loss": 3.5763, + "step": 23000 + }, + { + "epoch": 0.59, + "learning_rate": 4.802746183010555e-05, + "loss": 3.5727, + "step": 23100 + }, + { + "epoch": 0.59, + "learning_rate": 4.8018922703828945e-05, + "loss": 3.6164, + "step": 23200 + }, + { + "epoch": 0.6, + "learning_rate": 4.8010383577552346e-05, + "loss": 3.5565, + "step": 23300 + }, + { + "epoch": 0.6, + "learning_rate": 4.800184445127575e-05, + "loss": 3.2922, + "step": 23400 + }, + { + "epoch": 0.6, + "learning_rate": 4.799330532499915e-05, + "loss": 3.4609, + "step": 23500 + }, + { + "epoch": 0.6, + "learning_rate": 4.7984766198722544e-05, + "loss": 3.6773, + "step": 23600 + }, + { + "epoch": 0.61, + "learning_rate": 4.797622707244595e-05, + "loss": 3.4789, + "step": 23700 + }, + { + "epoch": 0.61, + "learning_rate": 4.796768794616935e-05, + "loss": 3.9325, + "step": 23800 + }, + { + "epoch": 0.61, + "learning_rate": 4.795914881989275e-05, + "loss": 3.3577, + "step": 23900 + }, + { + "epoch": 0.61, + "learning_rate": 4.795060969361615e-05, + "loss": 3.4149, + "step": 24000 + }, + { + "epoch": 0.62, + "learning_rate": 4.794207056733955e-05, + "loss": 3.6172, + "step": 24100 + }, + { + "epoch": 0.62, + "learning_rate": 4.7933531441062953e-05, + "loss": 3.4819, + "step": 24200 + }, + { + "epoch": 0.62, + "learning_rate": 4.7924992314786355e-05, + "loss": 3.4195, + "step": 24300 + }, + { + "epoch": 0.63, + "learning_rate": 4.791645318850975e-05, + "loss": 3.3149, + "step": 24400 + }, + { + "epoch": 0.63, + "learning_rate": 4.790791406223316e-05, + "loss": 3.3554, + "step": 24500 + }, + { + "epoch": 0.63, + "learning_rate": 4.789937493595655e-05, + "loss": 3.5358, + "step": 24600 + }, + { + "epoch": 0.63, + "learning_rate": 4.7890835809679954e-05, + "loss": 3.4567, + "step": 24700 + }, + { + "epoch": 0.64, + "learning_rate": 4.7882296683403356e-05, + "loss": 3.5939, + "step": 24800 + }, + { + "epoch": 0.64, + "learning_rate": 4.787375755712676e-05, + "loss": 3.3348, + "step": 24900 + }, + { + "epoch": 0.64, + "learning_rate": 4.786521843085016e-05, + "loss": 3.5004, + "step": 25000 + }, + { + "epoch": 0.64, + "learning_rate": 4.785667930457356e-05, + "loss": 3.5305, + "step": 25100 + }, + { + "epoch": 0.65, + "learning_rate": 4.7848140178296955e-05, + "loss": 3.3193, + "step": 25200 + }, + { + "epoch": 0.65, + "learning_rate": 4.7839601052020364e-05, + "loss": 3.4184, + "step": 25300 + }, + { + "epoch": 0.65, + "learning_rate": 4.783106192574376e-05, + "loss": 3.4644, + "step": 25400 + }, + { + "epoch": 0.65, + "learning_rate": 4.782252279946716e-05, + "loss": 3.5732, + "step": 25500 + }, + { + "epoch": 0.66, + "learning_rate": 4.781398367319056e-05, + "loss": 3.2855, + "step": 25600 + }, + { + "epoch": 0.66, + "learning_rate": 4.780544454691396e-05, + "loss": 3.464, + "step": 25700 + }, + { + "epoch": 0.66, + "learning_rate": 4.779690542063736e-05, + "loss": 3.5029, + "step": 25800 + }, + { + "epoch": 0.66, + "learning_rate": 4.7788366294360766e-05, + "loss": 3.4364, + "step": 25900 + }, + { + "epoch": 0.67, + "learning_rate": 4.777982716808416e-05, + "loss": 3.3555, + "step": 26000 + }, + { + "epoch": 0.67, + "learning_rate": 4.777128804180756e-05, + "loss": 3.6529, + "step": 26100 + }, + { + "epoch": 0.67, + "learning_rate": 4.7762748915530964e-05, + "loss": 3.537, + "step": 26200 + }, + { + "epoch": 0.67, + "learning_rate": 4.7754209789254366e-05, + "loss": 3.4883, + "step": 26300 + }, + { + "epoch": 0.68, + "learning_rate": 4.774567066297777e-05, + "loss": 3.6947, + "step": 26400 + }, + { + "epoch": 0.68, + "learning_rate": 4.773713153670117e-05, + "loss": 3.457, + "step": 26500 + }, + { + "epoch": 0.68, + "learning_rate": 4.7728592410424563e-05, + "loss": 3.4468, + "step": 26600 + }, + { + "epoch": 0.68, + "learning_rate": 4.772005328414797e-05, + "loss": 3.5284, + "step": 26700 + }, + { + "epoch": 0.69, + "learning_rate": 4.7711514157871367e-05, + "loss": 3.221, + "step": 26800 + }, + { + "epoch": 0.69, + "learning_rate": 4.770297503159477e-05, + "loss": 3.4848, + "step": 26900 + }, + { + "epoch": 0.69, + "learning_rate": 4.769443590531817e-05, + "loss": 3.4874, + "step": 27000 + }, + { + "epoch": 0.69, + "learning_rate": 4.768589677904157e-05, + "loss": 3.435, + "step": 27100 + }, + { + "epoch": 0.7, + "learning_rate": 4.767735765276497e-05, + "loss": 3.5241, + "step": 27200 + }, + { + "epoch": 0.7, + "learning_rate": 4.7668818526488374e-05, + "loss": 3.6542, + "step": 27300 + }, + { + "epoch": 0.7, + "learning_rate": 4.766027940021177e-05, + "loss": 3.3834, + "step": 27400 + }, + { + "epoch": 0.7, + "learning_rate": 4.765174027393518e-05, + "loss": 3.6834, + "step": 27500 + }, + { + "epoch": 0.71, + "learning_rate": 4.764320114765857e-05, + "loss": 3.2529, + "step": 27600 + }, + { + "epoch": 0.71, + "learning_rate": 4.7634662021381974e-05, + "loss": 3.3623, + "step": 27700 + }, + { + "epoch": 0.71, + "learning_rate": 4.7626122895105375e-05, + "loss": 3.4693, + "step": 27800 + }, + { + "epoch": 0.71, + "learning_rate": 4.761758376882878e-05, + "loss": 3.6151, + "step": 27900 + }, + { + "epoch": 0.72, + "learning_rate": 4.760904464255218e-05, + "loss": 3.4634, + "step": 28000 + }, + { + "epoch": 0.72, + "learning_rate": 4.760050551627558e-05, + "loss": 3.2379, + "step": 28100 + }, + { + "epoch": 0.72, + "learning_rate": 4.7591966389998975e-05, + "loss": 3.5468, + "step": 28200 + }, + { + "epoch": 0.72, + "learning_rate": 4.758342726372238e-05, + "loss": 3.3572, + "step": 28300 + }, + { + "epoch": 0.73, + "learning_rate": 4.757488813744578e-05, + "loss": 3.4741, + "step": 28400 + }, + { + "epoch": 0.73, + "learning_rate": 4.756634901116918e-05, + "loss": 3.5941, + "step": 28500 + }, + { + "epoch": 0.73, + "learning_rate": 4.755780988489258e-05, + "loss": 3.2845, + "step": 28600 + }, + { + "epoch": 0.74, + "learning_rate": 4.754927075861598e-05, + "loss": 3.3763, + "step": 28700 + }, + { + "epoch": 0.74, + "learning_rate": 4.754073163233938e-05, + "loss": 3.4421, + "step": 28800 + }, + { + "epoch": 0.74, + "learning_rate": 4.7532192506062786e-05, + "loss": 3.3501, + "step": 28900 + }, + { + "epoch": 0.74, + "learning_rate": 4.752365337978618e-05, + "loss": 3.4032, + "step": 29000 + }, + { + "epoch": 0.75, + "learning_rate": 4.751511425350958e-05, + "loss": 3.1186, + "step": 29100 + }, + { + "epoch": 0.75, + "learning_rate": 4.7506575127232983e-05, + "loss": 3.5095, + "step": 29200 + }, + { + "epoch": 0.75, + "learning_rate": 4.7498036000956385e-05, + "loss": 3.1795, + "step": 29300 + }, + { + "epoch": 0.75, + "learning_rate": 4.7489496874679787e-05, + "loss": 3.5747, + "step": 29400 + }, + { + "epoch": 0.76, + "learning_rate": 4.748095774840319e-05, + "loss": 3.6188, + "step": 29500 + }, + { + "epoch": 0.76, + "learning_rate": 4.747241862212658e-05, + "loss": 3.3566, + "step": 29600 + }, + { + "epoch": 0.76, + "learning_rate": 4.746387949584999e-05, + "loss": 3.547, + "step": 29700 + }, + { + "epoch": 0.76, + "learning_rate": 4.7455340369573386e-05, + "loss": 3.1518, + "step": 29800 + }, + { + "epoch": 0.77, + "learning_rate": 4.744680124329679e-05, + "loss": 3.466, + "step": 29900 + }, + { + "epoch": 0.77, + "learning_rate": 4.743826211702019e-05, + "loss": 3.2771, + "step": 30000 + }, + { + "epoch": 0.77, + "learning_rate": 4.742972299074359e-05, + "loss": 3.5729, + "step": 30100 + }, + { + "epoch": 0.77, + "learning_rate": 4.742118386446699e-05, + "loss": 3.4516, + "step": 30200 + }, + { + "epoch": 0.78, + "learning_rate": 4.7412644738190394e-05, + "loss": 3.3993, + "step": 30300 + }, + { + "epoch": 0.78, + "learning_rate": 4.740410561191379e-05, + "loss": 3.4406, + "step": 30400 + }, + { + "epoch": 0.78, + "learning_rate": 4.73955664856372e-05, + "loss": 3.5511, + "step": 30500 + }, + { + "epoch": 0.78, + "learning_rate": 4.738702735936059e-05, + "loss": 3.2725, + "step": 30600 + }, + { + "epoch": 0.79, + "learning_rate": 4.737848823308399e-05, + "loss": 3.4044, + "step": 30700 + }, + { + "epoch": 0.79, + "learning_rate": 4.7369949106807395e-05, + "loss": 3.6242, + "step": 30800 + }, + { + "epoch": 0.79, + "learning_rate": 4.7361409980530796e-05, + "loss": 3.6762, + "step": 30900 + }, + { + "epoch": 0.79, + "learning_rate": 4.735287085425419e-05, + "loss": 3.4278, + "step": 31000 + }, + { + "epoch": 0.8, + "learning_rate": 4.73443317279776e-05, + "loss": 3.3284, + "step": 31100 + }, + { + "epoch": 0.8, + "learning_rate": 4.7335792601700994e-05, + "loss": 3.5034, + "step": 31200 + }, + { + "epoch": 0.8, + "learning_rate": 4.73272534754244e-05, + "loss": 3.4058, + "step": 31300 + }, + { + "epoch": 0.8, + "learning_rate": 4.73187143491478e-05, + "loss": 3.3646, + "step": 31400 + }, + { + "epoch": 0.81, + "learning_rate": 4.73101752228712e-05, + "loss": 3.4112, + "step": 31500 + }, + { + "epoch": 0.81, + "learning_rate": 4.73016360965946e-05, + "loss": 3.475, + "step": 31600 + }, + { + "epoch": 0.81, + "learning_rate": 4.7293096970318e-05, + "loss": 3.6318, + "step": 31700 + }, + { + "epoch": 0.81, + "learning_rate": 4.7284557844041397e-05, + "loss": 3.2754, + "step": 31800 + }, + { + "epoch": 0.82, + "learning_rate": 4.7276018717764805e-05, + "loss": 3.3146, + "step": 31900 + }, + { + "epoch": 0.82, + "learning_rate": 4.72674795914882e-05, + "loss": 3.37, + "step": 32000 + }, + { + "epoch": 0.82, + "learning_rate": 4.72589404652116e-05, + "loss": 3.4869, + "step": 32100 + }, + { + "epoch": 0.82, + "learning_rate": 4.7250401338935e-05, + "loss": 3.4892, + "step": 32200 + }, + { + "epoch": 0.83, + "learning_rate": 4.7241862212658404e-05, + "loss": 3.217, + "step": 32300 + }, + { + "epoch": 0.83, + "learning_rate": 4.7233323086381806e-05, + "loss": 3.4166, + "step": 32400 + }, + { + "epoch": 0.83, + "learning_rate": 4.722478396010521e-05, + "loss": 3.4837, + "step": 32500 + }, + { + "epoch": 0.84, + "learning_rate": 4.72162448338286e-05, + "loss": 3.399, + "step": 32600 + }, + { + "epoch": 0.84, + "learning_rate": 4.720770570755201e-05, + "loss": 3.6814, + "step": 32700 + }, + { + "epoch": 0.84, + "learning_rate": 4.7199166581275405e-05, + "loss": 3.5863, + "step": 32800 + }, + { + "epoch": 0.84, + "learning_rate": 4.719062745499881e-05, + "loss": 3.3289, + "step": 32900 + }, + { + "epoch": 0.85, + "learning_rate": 4.718208832872221e-05, + "loss": 3.2249, + "step": 33000 + }, + { + "epoch": 0.85, + "learning_rate": 4.717354920244561e-05, + "loss": 3.6567, + "step": 33100 + }, + { + "epoch": 0.85, + "learning_rate": 4.716501007616901e-05, + "loss": 3.471, + "step": 33200 + }, + { + "epoch": 0.85, + "learning_rate": 4.715647094989241e-05, + "loss": 3.2949, + "step": 33300 + }, + { + "epoch": 0.86, + "learning_rate": 4.714793182361581e-05, + "loss": 3.3271, + "step": 33400 + }, + { + "epoch": 0.86, + "learning_rate": 4.7139392697339216e-05, + "loss": 3.2935, + "step": 33500 + }, + { + "epoch": 0.86, + "learning_rate": 4.713085357106261e-05, + "loss": 3.4567, + "step": 33600 + }, + { + "epoch": 0.86, + "learning_rate": 4.712231444478601e-05, + "loss": 3.5018, + "step": 33700 + }, + { + "epoch": 0.87, + "learning_rate": 4.7113775318509414e-05, + "loss": 3.462, + "step": 33800 + }, + { + "epoch": 0.87, + "learning_rate": 4.7105236192232816e-05, + "loss": 3.4016, + "step": 33900 + }, + { + "epoch": 0.87, + "learning_rate": 4.709669706595621e-05, + "loss": 3.3442, + "step": 34000 + }, + { + "epoch": 0.87, + "learning_rate": 4.708815793967962e-05, + "loss": 3.4873, + "step": 34100 + }, + { + "epoch": 0.88, + "learning_rate": 4.7079618813403013e-05, + "loss": 3.3928, + "step": 34200 + }, + { + "epoch": 0.88, + "learning_rate": 4.7071079687126415e-05, + "loss": 3.6974, + "step": 34300 + }, + { + "epoch": 0.88, + "learning_rate": 4.7062540560849816e-05, + "loss": 3.316, + "step": 34400 + }, + { + "epoch": 0.88, + "learning_rate": 4.705400143457322e-05, + "loss": 3.3236, + "step": 34500 + }, + { + "epoch": 0.89, + "learning_rate": 4.704546230829662e-05, + "loss": 3.53, + "step": 34600 + }, + { + "epoch": 0.89, + "learning_rate": 4.703692318202002e-05, + "loss": 3.5239, + "step": 34700 + }, + { + "epoch": 0.89, + "learning_rate": 4.7028384055743416e-05, + "loss": 3.1612, + "step": 34800 + }, + { + "epoch": 0.89, + "learning_rate": 4.7019844929466824e-05, + "loss": 3.3604, + "step": 34900 + }, + { + "epoch": 0.9, + "learning_rate": 4.701130580319022e-05, + "loss": 3.259, + "step": 35000 + }, + { + "epoch": 0.9, + "learning_rate": 4.700276667691362e-05, + "loss": 3.3522, + "step": 35100 + }, + { + "epoch": 0.9, + "learning_rate": 4.699422755063702e-05, + "loss": 3.3189, + "step": 35200 + }, + { + "epoch": 0.9, + "learning_rate": 4.698568842436042e-05, + "loss": 3.457, + "step": 35300 + }, + { + "epoch": 0.91, + "learning_rate": 4.6977149298083825e-05, + "loss": 3.3757, + "step": 35400 + }, + { + "epoch": 0.91, + "learning_rate": 4.696861017180722e-05, + "loss": 3.3143, + "step": 35500 + }, + { + "epoch": 0.91, + "learning_rate": 4.696007104553062e-05, + "loss": 3.329, + "step": 35600 + }, + { + "epoch": 0.91, + "learning_rate": 4.695153191925402e-05, + "loss": 3.4637, + "step": 35700 + }, + { + "epoch": 0.92, + "learning_rate": 4.6942992792977425e-05, + "loss": 3.3483, + "step": 35800 + }, + { + "epoch": 0.92, + "learning_rate": 4.693445366670082e-05, + "loss": 3.4662, + "step": 35900 + }, + { + "epoch": 0.92, + "learning_rate": 4.692591454042423e-05, + "loss": 3.2127, + "step": 36000 + }, + { + "epoch": 0.92, + "learning_rate": 4.691737541414762e-05, + "loss": 3.2416, + "step": 36100 + }, + { + "epoch": 0.93, + "learning_rate": 4.690883628787103e-05, + "loss": 3.1665, + "step": 36200 + }, + { + "epoch": 0.93, + "learning_rate": 4.6900297161594426e-05, + "loss": 3.371, + "step": 36300 + }, + { + "epoch": 0.93, + "learning_rate": 4.689175803531783e-05, + "loss": 3.2884, + "step": 36400 + }, + { + "epoch": 0.94, + "learning_rate": 4.688321890904123e-05, + "loss": 3.3204, + "step": 36500 + }, + { + "epoch": 0.94, + "learning_rate": 4.687467978276463e-05, + "loss": 3.2915, + "step": 36600 + }, + { + "epoch": 0.94, + "learning_rate": 4.6866140656488025e-05, + "loss": 3.2848, + "step": 36700 + }, + { + "epoch": 0.94, + "learning_rate": 4.685760153021143e-05, + "loss": 3.2604, + "step": 36800 + }, + { + "epoch": 0.95, + "learning_rate": 4.684906240393483e-05, + "loss": 3.1758, + "step": 36900 + }, + { + "epoch": 0.95, + "learning_rate": 4.684052327765823e-05, + "loss": 3.3302, + "step": 37000 + }, + { + "epoch": 0.95, + "learning_rate": 4.683198415138163e-05, + "loss": 3.5299, + "step": 37100 + }, + { + "epoch": 0.95, + "learning_rate": 4.682344502510503e-05, + "loss": 3.3508, + "step": 37200 + }, + { + "epoch": 0.96, + "learning_rate": 4.6814905898828434e-05, + "loss": 3.3845, + "step": 37300 + }, + { + "epoch": 0.96, + "learning_rate": 4.6806366772551836e-05, + "loss": 3.4054, + "step": 37400 + }, + { + "epoch": 0.96, + "learning_rate": 4.679782764627523e-05, + "loss": 3.4561, + "step": 37500 + }, + { + "epoch": 0.96, + "learning_rate": 4.678928851999864e-05, + "loss": 3.4069, + "step": 37600 + }, + { + "epoch": 0.97, + "learning_rate": 4.6780749393722034e-05, + "loss": 3.2533, + "step": 37700 + }, + { + "epoch": 0.97, + "learning_rate": 4.6772210267445435e-05, + "loss": 3.4234, + "step": 37800 + }, + { + "epoch": 0.97, + "learning_rate": 4.676367114116884e-05, + "loss": 3.1182, + "step": 37900 + }, + { + "epoch": 0.97, + "learning_rate": 4.675513201489224e-05, + "loss": 3.2555, + "step": 38000 + }, + { + "epoch": 0.98, + "learning_rate": 4.674659288861564e-05, + "loss": 3.5934, + "step": 38100 + }, + { + "epoch": 0.98, + "learning_rate": 4.673805376233904e-05, + "loss": 3.3511, + "step": 38200 + }, + { + "epoch": 0.98, + "learning_rate": 4.6729514636062436e-05, + "loss": 3.3294, + "step": 38300 + }, + { + "epoch": 0.98, + "learning_rate": 4.6720975509785845e-05, + "loss": 3.2316, + "step": 38400 + }, + { + "epoch": 0.99, + "learning_rate": 4.671243638350924e-05, + "loss": 3.2372, + "step": 38500 + }, + { + "epoch": 0.99, + "learning_rate": 4.670389725723264e-05, + "loss": 3.43, + "step": 38600 + }, + { + "epoch": 0.99, + "learning_rate": 4.669535813095604e-05, + "loss": 3.2801, + "step": 38700 + }, + { + "epoch": 0.99, + "learning_rate": 4.6686819004679444e-05, + "loss": 3.4245, + "step": 38800 + }, + { + "epoch": 1.0, + "learning_rate": 4.667827987840284e-05, + "loss": 3.3985, + "step": 38900 + }, + { + "epoch": 1.0, + "learning_rate": 4.666974075212625e-05, + "loss": 3.2582, + "step": 39000 + }, + { + "epoch": 1.0, + "learning_rate": 4.666120162584964e-05, + "loss": 3.445, + "step": 39100 + }, + { + "epoch": 1.0, + "learning_rate": 4.665266249957304e-05, + "loss": 3.3439, + "step": 39200 + }, + { + "epoch": 1.01, + "learning_rate": 4.6644123373296445e-05, + "loss": 3.3569, + "step": 39300 + }, + { + "epoch": 1.01, + "learning_rate": 4.6635584247019846e-05, + "loss": 3.2744, + "step": 39400 + }, + { + "epoch": 1.01, + "learning_rate": 4.662704512074325e-05, + "loss": 3.4693, + "step": 39500 + }, + { + "epoch": 1.01, + "learning_rate": 4.661850599446665e-05, + "loss": 3.2883, + "step": 39600 + }, + { + "epoch": 1.02, + "learning_rate": 4.6609966868190044e-05, + "loss": 3.3045, + "step": 39700 + }, + { + "epoch": 1.02, + "learning_rate": 4.660142774191345e-05, + "loss": 3.4343, + "step": 39800 + }, + { + "epoch": 1.02, + "learning_rate": 4.659288861563685e-05, + "loss": 3.2372, + "step": 39900 + }, + { + "epoch": 1.02, + "learning_rate": 4.658434948936025e-05, + "loss": 3.4615, + "step": 40000 + }, + { + "epoch": 1.03, + "learning_rate": 4.657581036308365e-05, + "loss": 3.4103, + "step": 40100 + }, + { + "epoch": 1.03, + "learning_rate": 4.656727123680705e-05, + "loss": 3.5328, + "step": 40200 + }, + { + "epoch": 1.03, + "learning_rate": 4.6558732110530454e-05, + "loss": 3.4379, + "step": 40300 + }, + { + "epoch": 1.03, + "learning_rate": 4.6550192984253855e-05, + "loss": 3.3792, + "step": 40400 + }, + { + "epoch": 1.04, + "learning_rate": 4.654165385797725e-05, + "loss": 3.5508, + "step": 40500 + }, + { + "epoch": 1.04, + "learning_rate": 4.653311473170066e-05, + "loss": 3.3502, + "step": 40600 + }, + { + "epoch": 1.04, + "learning_rate": 4.652457560542405e-05, + "loss": 3.4316, + "step": 40700 + }, + { + "epoch": 1.05, + "learning_rate": 4.6516036479147455e-05, + "loss": 3.1812, + "step": 40800 + }, + { + "epoch": 1.05, + "learning_rate": 4.6507497352870856e-05, + "loss": 3.348, + "step": 40900 + }, + { + "epoch": 1.05, + "learning_rate": 4.649895822659426e-05, + "loss": 3.3069, + "step": 41000 + }, + { + "epoch": 1.05, + "learning_rate": 4.649041910031766e-05, + "loss": 3.2655, + "step": 41100 + }, + { + "epoch": 1.06, + "learning_rate": 4.648187997404106e-05, + "loss": 3.1583, + "step": 41200 + }, + { + "epoch": 1.06, + "learning_rate": 4.6473340847764456e-05, + "loss": 3.3218, + "step": 41300 + }, + { + "epoch": 1.06, + "learning_rate": 4.6464801721487864e-05, + "loss": 3.4447, + "step": 41400 + }, + { + "epoch": 1.06, + "learning_rate": 4.645626259521126e-05, + "loss": 3.2127, + "step": 41500 + }, + { + "epoch": 1.07, + "learning_rate": 4.644772346893466e-05, + "loss": 3.1546, + "step": 41600 + }, + { + "epoch": 1.07, + "learning_rate": 4.643918434265806e-05, + "loss": 3.5458, + "step": 41700 + }, + { + "epoch": 1.07, + "learning_rate": 4.643064521638146e-05, + "loss": 3.3776, + "step": 41800 + }, + { + "epoch": 1.07, + "learning_rate": 4.642210609010486e-05, + "loss": 3.3053, + "step": 41900 + }, + { + "epoch": 1.08, + "learning_rate": 4.6413566963828266e-05, + "loss": 3.4147, + "step": 42000 + }, + { + "epoch": 1.08, + "learning_rate": 4.640502783755166e-05, + "loss": 3.308, + "step": 42100 + }, + { + "epoch": 1.08, + "learning_rate": 4.639648871127506e-05, + "loss": 3.1021, + "step": 42200 + }, + { + "epoch": 1.08, + "learning_rate": 4.6387949584998464e-05, + "loss": 3.4419, + "step": 42300 + }, + { + "epoch": 1.09, + "learning_rate": 4.6379410458721866e-05, + "loss": 3.1076, + "step": 42400 + }, + { + "epoch": 1.09, + "learning_rate": 4.637087133244527e-05, + "loss": 3.2782, + "step": 42500 + }, + { + "epoch": 1.09, + "learning_rate": 4.636233220616867e-05, + "loss": 3.43, + "step": 42600 + }, + { + "epoch": 1.09, + "learning_rate": 4.6353793079892064e-05, + "loss": 3.4279, + "step": 42700 + }, + { + "epoch": 1.1, + "learning_rate": 4.634525395361547e-05, + "loss": 3.2666, + "step": 42800 + }, + { + "epoch": 1.1, + "learning_rate": 4.633671482733887e-05, + "loss": 3.3548, + "step": 42900 + }, + { + "epoch": 1.1, + "learning_rate": 4.632817570106227e-05, + "loss": 3.378, + "step": 43000 + }, + { + "epoch": 1.1, + "learning_rate": 4.631963657478567e-05, + "loss": 3.2853, + "step": 43100 + }, + { + "epoch": 1.11, + "learning_rate": 4.631109744850907e-05, + "loss": 3.3193, + "step": 43200 + }, + { + "epoch": 1.11, + "learning_rate": 4.630255832223247e-05, + "loss": 3.4065, + "step": 43300 + }, + { + "epoch": 1.11, + "learning_rate": 4.6294019195955875e-05, + "loss": 3.2967, + "step": 43400 + }, + { + "epoch": 1.11, + "learning_rate": 4.628548006967927e-05, + "loss": 3.3564, + "step": 43500 + }, + { + "epoch": 1.12, + "learning_rate": 4.627694094340268e-05, + "loss": 3.4165, + "step": 43600 + }, + { + "epoch": 1.12, + "learning_rate": 4.626840181712607e-05, + "loss": 3.2119, + "step": 43700 + }, + { + "epoch": 1.12, + "learning_rate": 4.6259862690849474e-05, + "loss": 3.2635, + "step": 43800 + }, + { + "epoch": 1.12, + "learning_rate": 4.6251323564572875e-05, + "loss": 3.2283, + "step": 43900 + }, + { + "epoch": 1.13, + "learning_rate": 4.624278443829628e-05, + "loss": 3.2392, + "step": 44000 + }, + { + "epoch": 1.13, + "learning_rate": 4.623424531201967e-05, + "loss": 3.274, + "step": 44100 + }, + { + "epoch": 1.13, + "learning_rate": 4.622570618574308e-05, + "loss": 3.1144, + "step": 44200 + }, + { + "epoch": 1.13, + "learning_rate": 4.6217167059466475e-05, + "loss": 3.2448, + "step": 44300 + }, + { + "epoch": 1.14, + "learning_rate": 4.6208627933189876e-05, + "loss": 3.4071, + "step": 44400 + }, + { + "epoch": 1.14, + "learning_rate": 4.620008880691328e-05, + "loss": 3.2883, + "step": 44500 + }, + { + "epoch": 1.14, + "learning_rate": 4.619154968063668e-05, + "loss": 3.3453, + "step": 44600 + }, + { + "epoch": 1.15, + "learning_rate": 4.618301055436008e-05, + "loss": 3.237, + "step": 44700 + }, + { + "epoch": 1.15, + "learning_rate": 4.617447142808348e-05, + "loss": 3.4076, + "step": 44800 + }, + { + "epoch": 1.15, + "learning_rate": 4.616593230180688e-05, + "loss": 3.2343, + "step": 44900 + }, + { + "epoch": 1.15, + "learning_rate": 4.6157393175530286e-05, + "loss": 3.0996, + "step": 45000 + }, + { + "epoch": 1.16, + "learning_rate": 4.614885404925368e-05, + "loss": 3.2876, + "step": 45100 + }, + { + "epoch": 1.16, + "learning_rate": 4.614031492297708e-05, + "loss": 3.3522, + "step": 45200 + }, + { + "epoch": 1.16, + "learning_rate": 4.6131775796700484e-05, + "loss": 3.274, + "step": 45300 + }, + { + "epoch": 1.16, + "learning_rate": 4.6123236670423885e-05, + "loss": 3.168, + "step": 45400 + }, + { + "epoch": 1.17, + "learning_rate": 4.611469754414729e-05, + "loss": 3.3319, + "step": 45500 + }, + { + "epoch": 1.17, + "learning_rate": 4.610615841787069e-05, + "loss": 3.2365, + "step": 45600 + }, + { + "epoch": 1.17, + "learning_rate": 4.609761929159408e-05, + "loss": 3.208, + "step": 45700 + }, + { + "epoch": 1.17, + "learning_rate": 4.608908016531749e-05, + "loss": 3.0745, + "step": 45800 + }, + { + "epoch": 1.18, + "learning_rate": 4.6080541039040886e-05, + "loss": 3.1878, + "step": 45900 + }, + { + "epoch": 1.18, + "learning_rate": 4.607200191276429e-05, + "loss": 3.3813, + "step": 46000 + }, + { + "epoch": 1.18, + "learning_rate": 4.606346278648769e-05, + "loss": 3.2259, + "step": 46100 + }, + { + "epoch": 1.18, + "learning_rate": 4.605492366021109e-05, + "loss": 3.355, + "step": 46200 + }, + { + "epoch": 1.19, + "learning_rate": 4.604638453393449e-05, + "loss": 3.3909, + "step": 46300 + }, + { + "epoch": 1.19, + "learning_rate": 4.6037845407657894e-05, + "loss": 3.2687, + "step": 46400 + }, + { + "epoch": 1.19, + "learning_rate": 4.602930628138129e-05, + "loss": 3.2326, + "step": 46500 + }, + { + "epoch": 1.19, + "learning_rate": 4.60207671551047e-05, + "loss": 3.3135, + "step": 46600 + }, + { + "epoch": 1.2, + "learning_rate": 4.601222802882809e-05, + "loss": 3.5051, + "step": 46700 + }, + { + "epoch": 1.2, + "learning_rate": 4.600368890255149e-05, + "loss": 3.009, + "step": 46800 + }, + { + "epoch": 1.2, + "learning_rate": 4.5995149776274895e-05, + "loss": 3.1782, + "step": 46900 + }, + { + "epoch": 1.2, + "learning_rate": 4.5986610649998296e-05, + "loss": 3.0154, + "step": 47000 + }, + { + "epoch": 1.21, + "learning_rate": 4.597807152372169e-05, + "loss": 3.3357, + "step": 47100 + }, + { + "epoch": 1.21, + "learning_rate": 4.59695323974451e-05, + "loss": 3.2224, + "step": 47200 + }, + { + "epoch": 1.21, + "learning_rate": 4.5960993271168494e-05, + "loss": 3.1395, + "step": 47300 + }, + { + "epoch": 1.21, + "learning_rate": 4.5952454144891896e-05, + "loss": 3.4303, + "step": 47400 + }, + { + "epoch": 1.22, + "learning_rate": 4.59439150186153e-05, + "loss": 3.1074, + "step": 47500 + }, + { + "epoch": 1.22, + "learning_rate": 4.59353758923387e-05, + "loss": 3.3658, + "step": 47600 + }, + { + "epoch": 1.22, + "learning_rate": 4.59268367660621e-05, + "loss": 3.3173, + "step": 47700 + }, + { + "epoch": 1.22, + "learning_rate": 4.59182976397855e-05, + "loss": 2.9707, + "step": 47800 + }, + { + "epoch": 1.23, + "learning_rate": 4.59097585135089e-05, + "loss": 3.3756, + "step": 47900 + }, + { + "epoch": 1.23, + "learning_rate": 4.5901219387232305e-05, + "loss": 3.0612, + "step": 48000 + }, + { + "epoch": 1.23, + "learning_rate": 4.58926802609557e-05, + "loss": 3.1551, + "step": 48100 + }, + { + "epoch": 1.23, + "learning_rate": 4.58841411346791e-05, + "loss": 3.3508, + "step": 48200 + }, + { + "epoch": 1.24, + "learning_rate": 4.58756020084025e-05, + "loss": 3.3484, + "step": 48300 + }, + { + "epoch": 1.24, + "learning_rate": 4.5867062882125904e-05, + "loss": 3.2109, + "step": 48400 + }, + { + "epoch": 1.24, + "learning_rate": 4.5858523755849306e-05, + "loss": 3.2551, + "step": 48500 + }, + { + "epoch": 1.25, + "learning_rate": 4.584998462957271e-05, + "loss": 3.1801, + "step": 48600 + }, + { + "epoch": 1.25, + "learning_rate": 4.58414455032961e-05, + "loss": 3.2943, + "step": 48700 + }, + { + "epoch": 1.25, + "learning_rate": 4.583290637701951e-05, + "loss": 3.1567, + "step": 48800 + }, + { + "epoch": 1.25, + "learning_rate": 4.5824367250742905e-05, + "loss": 3.6534, + "step": 48900 + }, + { + "epoch": 1.26, + "learning_rate": 4.581582812446631e-05, + "loss": 3.2714, + "step": 49000 + }, + { + "epoch": 1.26, + "learning_rate": 4.580728899818971e-05, + "loss": 3.2884, + "step": 49100 + }, + { + "epoch": 1.26, + "learning_rate": 4.579874987191311e-05, + "loss": 3.0826, + "step": 49200 + }, + { + "epoch": 1.26, + "learning_rate": 4.5790210745636505e-05, + "loss": 3.2807, + "step": 49300 + }, + { + "epoch": 1.27, + "learning_rate": 4.578167161935991e-05, + "loss": 3.3775, + "step": 49400 + }, + { + "epoch": 1.27, + "learning_rate": 4.577313249308331e-05, + "loss": 3.0698, + "step": 49500 + }, + { + "epoch": 1.27, + "learning_rate": 4.576459336680671e-05, + "loss": 3.299, + "step": 49600 + }, + { + "epoch": 1.27, + "learning_rate": 4.575605424053011e-05, + "loss": 3.1239, + "step": 49700 + }, + { + "epoch": 1.28, + "learning_rate": 4.574751511425351e-05, + "loss": 3.2043, + "step": 49800 + }, + { + "epoch": 1.28, + "learning_rate": 4.5738975987976914e-05, + "loss": 3.3373, + "step": 49900 + }, + { + "epoch": 1.28, + "learning_rate": 4.5730436861700316e-05, + "loss": 3.6232, + "step": 50000 + }, + { + "epoch": 1.28, + "learning_rate": 4.572189773542371e-05, + "loss": 3.1563, + "step": 50100 + }, + { + "epoch": 1.29, + "learning_rate": 4.571335860914712e-05, + "loss": 3.2558, + "step": 50200 + }, + { + "epoch": 1.29, + "learning_rate": 4.5704819482870514e-05, + "loss": 3.2061, + "step": 50300 + }, + { + "epoch": 1.29, + "learning_rate": 4.5696280356593915e-05, + "loss": 3.1413, + "step": 50400 + }, + { + "epoch": 1.29, + "learning_rate": 4.568774123031732e-05, + "loss": 3.1439, + "step": 50500 + }, + { + "epoch": 1.3, + "learning_rate": 4.567920210404072e-05, + "loss": 3.4335, + "step": 50600 + }, + { + "epoch": 1.3, + "learning_rate": 4.567066297776412e-05, + "loss": 3.3769, + "step": 50700 + }, + { + "epoch": 1.3, + "learning_rate": 4.566212385148752e-05, + "loss": 3.2077, + "step": 50800 + }, + { + "epoch": 1.3, + "learning_rate": 4.5653584725210916e-05, + "loss": 3.2767, + "step": 50900 + }, + { + "epoch": 1.31, + "learning_rate": 4.5645045598934324e-05, + "loss": 3.3502, + "step": 51000 + }, + { + "epoch": 1.31, + "learning_rate": 4.563650647265772e-05, + "loss": 3.2704, + "step": 51100 + }, + { + "epoch": 1.31, + "learning_rate": 4.562796734638112e-05, + "loss": 3.3187, + "step": 51200 + }, + { + "epoch": 1.31, + "learning_rate": 4.561942822010452e-05, + "loss": 3.1367, + "step": 51300 + }, + { + "epoch": 1.32, + "learning_rate": 4.5610889093827924e-05, + "loss": 3.3311, + "step": 51400 + }, + { + "epoch": 1.32, + "learning_rate": 4.5602349967551325e-05, + "loss": 3.2221, + "step": 51500 + }, + { + "epoch": 1.32, + "learning_rate": 4.559381084127473e-05, + "loss": 3.3227, + "step": 51600 + }, + { + "epoch": 1.32, + "learning_rate": 4.558527171499812e-05, + "loss": 3.2998, + "step": 51700 + }, + { + "epoch": 1.33, + "learning_rate": 4.557673258872153e-05, + "loss": 3.3506, + "step": 51800 + }, + { + "epoch": 1.33, + "learning_rate": 4.5568193462444925e-05, + "loss": 3.3649, + "step": 51900 + }, + { + "epoch": 1.33, + "learning_rate": 4.5559654336168326e-05, + "loss": 3.3714, + "step": 52000 + }, + { + "epoch": 1.33, + "learning_rate": 4.555111520989173e-05, + "loss": 3.0725, + "step": 52100 + }, + { + "epoch": 1.34, + "learning_rate": 4.554257608361513e-05, + "loss": 3.2297, + "step": 52200 + }, + { + "epoch": 1.34, + "learning_rate": 4.5534036957338524e-05, + "loss": 3.3677, + "step": 52300 + }, + { + "epoch": 1.34, + "learning_rate": 4.552549783106193e-05, + "loss": 3.2325, + "step": 52400 + }, + { + "epoch": 1.34, + "learning_rate": 4.551695870478533e-05, + "loss": 3.142, + "step": 52500 + }, + { + "epoch": 1.35, + "learning_rate": 4.550841957850873e-05, + "loss": 3.1671, + "step": 52600 + }, + { + "epoch": 1.35, + "learning_rate": 4.549988045223213e-05, + "loss": 3.285, + "step": 52700 + }, + { + "epoch": 1.35, + "learning_rate": 4.5491341325955525e-05, + "loss": 3.0934, + "step": 52800 + }, + { + "epoch": 1.36, + "learning_rate": 4.5482802199678934e-05, + "loss": 3.3774, + "step": 52900 + }, + { + "epoch": 1.36, + "learning_rate": 4.547426307340233e-05, + "loss": 3.4387, + "step": 53000 + }, + { + "epoch": 1.36, + "learning_rate": 4.546572394712573e-05, + "loss": 3.2799, + "step": 53100 + }, + { + "epoch": 1.36, + "learning_rate": 4.545718482084913e-05, + "loss": 3.4177, + "step": 53200 + }, + { + "epoch": 1.37, + "learning_rate": 4.544864569457253e-05, + "loss": 3.248, + "step": 53300 + }, + { + "epoch": 1.37, + "learning_rate": 4.5440106568295934e-05, + "loss": 3.2275, + "step": 53400 + }, + { + "epoch": 1.37, + "learning_rate": 4.5431567442019336e-05, + "loss": 3.1949, + "step": 53500 + }, + { + "epoch": 1.37, + "learning_rate": 4.542302831574273e-05, + "loss": 3.3461, + "step": 53600 + }, + { + "epoch": 1.38, + "learning_rate": 4.541448918946614e-05, + "loss": 3.0244, + "step": 53700 + }, + { + "epoch": 1.38, + "learning_rate": 4.5405950063189534e-05, + "loss": 3.3828, + "step": 53800 + }, + { + "epoch": 1.38, + "learning_rate": 4.5397410936912935e-05, + "loss": 3.2586, + "step": 53900 + }, + { + "epoch": 1.38, + "learning_rate": 4.538887181063634e-05, + "loss": 3.2267, + "step": 54000 + }, + { + "epoch": 1.39, + "learning_rate": 4.538033268435974e-05, + "loss": 3.0411, + "step": 54100 + }, + { + "epoch": 1.39, + "learning_rate": 4.537179355808313e-05, + "loss": 3.4893, + "step": 54200 + }, + { + "epoch": 1.39, + "learning_rate": 4.536325443180654e-05, + "loss": 3.1938, + "step": 54300 + }, + { + "epoch": 1.39, + "learning_rate": 4.5354715305529936e-05, + "loss": 3.0352, + "step": 54400 + }, + { + "epoch": 1.4, + "learning_rate": 4.534617617925334e-05, + "loss": 3.1959, + "step": 54500 + }, + { + "epoch": 1.4, + "learning_rate": 4.533763705297674e-05, + "loss": 3.2033, + "step": 54600 + }, + { + "epoch": 1.4, + "learning_rate": 4.532909792670014e-05, + "loss": 3.2207, + "step": 54700 + }, + { + "epoch": 1.4, + "learning_rate": 4.532055880042354e-05, + "loss": 3.1523, + "step": 54800 + }, + { + "epoch": 1.41, + "learning_rate": 4.5312019674146944e-05, + "loss": 3.099, + "step": 54900 + }, + { + "epoch": 1.41, + "learning_rate": 4.530348054787034e-05, + "loss": 3.3705, + "step": 55000 + }, + { + "epoch": 1.41, + "learning_rate": 4.529494142159375e-05, + "loss": 3.1773, + "step": 55100 + }, + { + "epoch": 1.41, + "learning_rate": 4.528640229531714e-05, + "loss": 3.076, + "step": 55200 + }, + { + "epoch": 1.42, + "learning_rate": 4.5277863169040544e-05, + "loss": 3.1966, + "step": 55300 + }, + { + "epoch": 1.42, + "learning_rate": 4.5269324042763945e-05, + "loss": 3.1723, + "step": 55400 + }, + { + "epoch": 1.42, + "learning_rate": 4.526078491648735e-05, + "loss": 3.3688, + "step": 55500 + }, + { + "epoch": 1.42, + "learning_rate": 4.525224579021075e-05, + "loss": 3.241, + "step": 55600 + }, + { + "epoch": 1.43, + "learning_rate": 4.524370666393415e-05, + "loss": 3.3047, + "step": 55700 + }, + { + "epoch": 1.43, + "learning_rate": 4.5235167537657545e-05, + "loss": 2.9904, + "step": 55800 + }, + { + "epoch": 1.43, + "learning_rate": 4.522662841138095e-05, + "loss": 3.1969, + "step": 55900 + }, + { + "epoch": 1.43, + "learning_rate": 4.521808928510435e-05, + "loss": 3.1953, + "step": 56000 + }, + { + "epoch": 1.44, + "learning_rate": 4.520955015882775e-05, + "loss": 3.1532, + "step": 56100 + }, + { + "epoch": 1.44, + "learning_rate": 4.520101103255115e-05, + "loss": 3.2045, + "step": 56200 + }, + { + "epoch": 1.44, + "learning_rate": 4.519247190627455e-05, + "loss": 3.2981, + "step": 56300 + }, + { + "epoch": 1.44, + "learning_rate": 4.5183932779997954e-05, + "loss": 3.3037, + "step": 56400 + }, + { + "epoch": 1.45, + "learning_rate": 4.5175393653721355e-05, + "loss": 3.1982, + "step": 56500 + }, + { + "epoch": 1.45, + "learning_rate": 4.516685452744475e-05, + "loss": 3.2365, + "step": 56600 + }, + { + "epoch": 1.45, + "learning_rate": 4.515831540116816e-05, + "loss": 3.2118, + "step": 56700 + }, + { + "epoch": 1.46, + "learning_rate": 4.514977627489155e-05, + "loss": 3.2382, + "step": 56800 + }, + { + "epoch": 1.46, + "learning_rate": 4.5141237148614955e-05, + "loss": 3.1484, + "step": 56900 + }, + { + "epoch": 1.46, + "learning_rate": 4.5132698022338356e-05, + "loss": 3.255, + "step": 57000 + }, + { + "epoch": 1.46, + "learning_rate": 4.512415889606176e-05, + "loss": 3.3042, + "step": 57100 + }, + { + "epoch": 1.47, + "learning_rate": 4.511561976978515e-05, + "loss": 3.1508, + "step": 57200 + }, + { + "epoch": 1.47, + "learning_rate": 4.510708064350856e-05, + "loss": 3.1394, + "step": 57300 + }, + { + "epoch": 1.47, + "learning_rate": 4.5098541517231956e-05, + "loss": 3.1828, + "step": 57400 + }, + { + "epoch": 1.47, + "learning_rate": 4.509000239095536e-05, + "loss": 3.2893, + "step": 57500 + }, + { + "epoch": 1.48, + "learning_rate": 4.508146326467876e-05, + "loss": 2.9791, + "step": 57600 + }, + { + "epoch": 1.48, + "learning_rate": 4.507292413840216e-05, + "loss": 3.1324, + "step": 57700 + }, + { + "epoch": 1.48, + "learning_rate": 4.506438501212556e-05, + "loss": 3.1465, + "step": 57800 + }, + { + "epoch": 1.48, + "learning_rate": 4.5055845885848963e-05, + "loss": 3.172, + "step": 57900 + }, + { + "epoch": 1.49, + "learning_rate": 4.504730675957236e-05, + "loss": 3.2903, + "step": 58000 + }, + { + "epoch": 1.49, + "learning_rate": 4.5038767633295767e-05, + "loss": 3.2377, + "step": 58100 + }, + { + "epoch": 1.49, + "learning_rate": 4.503022850701916e-05, + "loss": 3.0679, + "step": 58200 + }, + { + "epoch": 1.49, + "learning_rate": 4.502168938074256e-05, + "loss": 3.3194, + "step": 58300 + }, + { + "epoch": 1.5, + "learning_rate": 4.5013150254465964e-05, + "loss": 3.2289, + "step": 58400 + }, + { + "epoch": 1.5, + "learning_rate": 4.5004611128189366e-05, + "loss": 3.1449, + "step": 58500 + }, + { + "epoch": 1.5, + "learning_rate": 4.499607200191277e-05, + "loss": 3.3433, + "step": 58600 + }, + { + "epoch": 1.5, + "learning_rate": 4.498753287563617e-05, + "loss": 3.3692, + "step": 58700 + }, + { + "epoch": 1.51, + "learning_rate": 4.4978993749359564e-05, + "loss": 3.2493, + "step": 58800 + }, + { + "epoch": 1.51, + "learning_rate": 4.497045462308297e-05, + "loss": 3.2536, + "step": 58900 + }, + { + "epoch": 1.51, + "learning_rate": 4.496191549680637e-05, + "loss": 3.2131, + "step": 59000 + }, + { + "epoch": 1.51, + "learning_rate": 4.495337637052977e-05, + "loss": 2.9688, + "step": 59100 + }, + { + "epoch": 1.52, + "learning_rate": 4.494483724425317e-05, + "loss": 3.3731, + "step": 59200 + }, + { + "epoch": 1.52, + "learning_rate": 4.493629811797657e-05, + "loss": 3.2793, + "step": 59300 + }, + { + "epoch": 1.52, + "learning_rate": 4.4927758991699966e-05, + "loss": 3.1691, + "step": 59400 + }, + { + "epoch": 1.52, + "learning_rate": 4.4919219865423375e-05, + "loss": 3.3579, + "step": 59500 + }, + { + "epoch": 1.53, + "learning_rate": 4.491068073914677e-05, + "loss": 3.3206, + "step": 59600 + }, + { + "epoch": 1.53, + "learning_rate": 4.490214161287017e-05, + "loss": 3.1517, + "step": 59700 + }, + { + "epoch": 1.53, + "learning_rate": 4.489360248659357e-05, + "loss": 3.3422, + "step": 59800 + }, + { + "epoch": 1.53, + "learning_rate": 4.4885063360316974e-05, + "loss": 3.2501, + "step": 59900 + }, + { + "epoch": 1.54, + "learning_rate": 4.4876524234040376e-05, + "loss": 3.0996, + "step": 60000 + }, + { + "epoch": 1.54, + "learning_rate": 4.486798510776378e-05, + "loss": 3.4043, + "step": 60100 + }, + { + "epoch": 1.54, + "learning_rate": 4.485944598148717e-05, + "loss": 3.2396, + "step": 60200 + }, + { + "epoch": 1.54, + "learning_rate": 4.485090685521058e-05, + "loss": 3.0377, + "step": 60300 + }, + { + "epoch": 1.55, + "learning_rate": 4.4842367728933975e-05, + "loss": 3.2877, + "step": 60400 + }, + { + "epoch": 1.55, + "learning_rate": 4.483382860265738e-05, + "loss": 3.2888, + "step": 60500 + }, + { + "epoch": 1.55, + "learning_rate": 4.482528947638078e-05, + "loss": 3.2973, + "step": 60600 + }, + { + "epoch": 1.55, + "learning_rate": 4.481675035010418e-05, + "loss": 3.2159, + "step": 60700 + }, + { + "epoch": 1.56, + "learning_rate": 4.480821122382758e-05, + "loss": 3.3014, + "step": 60800 + }, + { + "epoch": 1.56, + "learning_rate": 4.479967209755098e-05, + "loss": 3.0788, + "step": 60900 + }, + { + "epoch": 1.56, + "learning_rate": 4.479113297127438e-05, + "loss": 3.1098, + "step": 61000 + }, + { + "epoch": 1.57, + "learning_rate": 4.4782593844997786e-05, + "loss": 3.3015, + "step": 61100 + }, + { + "epoch": 1.57, + "learning_rate": 4.477405471872118e-05, + "loss": 3.1981, + "step": 61200 + }, + { + "epoch": 1.57, + "learning_rate": 4.476551559244458e-05, + "loss": 3.1165, + "step": 61300 + }, + { + "epoch": 1.57, + "learning_rate": 4.4756976466167984e-05, + "loss": 3.1384, + "step": 61400 + }, + { + "epoch": 1.58, + "learning_rate": 4.4748437339891385e-05, + "loss": 3.2794, + "step": 61500 + }, + { + "epoch": 1.58, + "learning_rate": 4.473989821361479e-05, + "loss": 3.3021, + "step": 61600 + }, + { + "epoch": 1.58, + "learning_rate": 4.473135908733819e-05, + "loss": 3.4168, + "step": 61700 + }, + { + "epoch": 1.58, + "learning_rate": 4.472281996106158e-05, + "loss": 3.1338, + "step": 61800 + }, + { + "epoch": 1.59, + "learning_rate": 4.471428083478499e-05, + "loss": 3.3508, + "step": 61900 + }, + { + "epoch": 1.59, + "learning_rate": 4.4705741708508386e-05, + "loss": 3.1625, + "step": 62000 + }, + { + "epoch": 1.59, + "learning_rate": 4.469720258223179e-05, + "loss": 3.3888, + "step": 62100 + }, + { + "epoch": 1.59, + "learning_rate": 4.468866345595519e-05, + "loss": 3.2584, + "step": 62200 + }, + { + "epoch": 1.6, + "learning_rate": 4.468012432967859e-05, + "loss": 3.3002, + "step": 62300 + }, + { + "epoch": 1.6, + "learning_rate": 4.4671585203401986e-05, + "loss": 3.173, + "step": 62400 + }, + { + "epoch": 1.6, + "learning_rate": 4.4663046077125394e-05, + "loss": 3.0448, + "step": 62500 + }, + { + "epoch": 1.6, + "learning_rate": 4.465450695084879e-05, + "loss": 3.0071, + "step": 62600 + }, + { + "epoch": 1.61, + "learning_rate": 4.464596782457219e-05, + "loss": 3.3659, + "step": 62700 + }, + { + "epoch": 1.61, + "learning_rate": 4.463742869829559e-05, + "loss": 3.1854, + "step": 62800 + }, + { + "epoch": 1.61, + "learning_rate": 4.4628889572018993e-05, + "loss": 3.3044, + "step": 62900 + }, + { + "epoch": 1.61, + "learning_rate": 4.4620350445742395e-05, + "loss": 3.2739, + "step": 63000 + }, + { + "epoch": 1.62, + "learning_rate": 4.4611811319465797e-05, + "loss": 3.2855, + "step": 63100 + }, + { + "epoch": 1.62, + "learning_rate": 4.460327219318919e-05, + "loss": 3.0983, + "step": 63200 + }, + { + "epoch": 1.62, + "learning_rate": 4.45947330669126e-05, + "loss": 3.1033, + "step": 63300 + }, + { + "epoch": 1.62, + "learning_rate": 4.4586193940635994e-05, + "loss": 3.0957, + "step": 63400 + }, + { + "epoch": 1.63, + "learning_rate": 4.4577654814359396e-05, + "loss": 3.0914, + "step": 63500 + }, + { + "epoch": 1.63, + "learning_rate": 4.45691156880828e-05, + "loss": 3.1328, + "step": 63600 + }, + { + "epoch": 1.63, + "learning_rate": 4.45605765618062e-05, + "loss": 3.1602, + "step": 63700 + }, + { + "epoch": 1.63, + "learning_rate": 4.45520374355296e-05, + "loss": 3.1097, + "step": 63800 + }, + { + "epoch": 1.64, + "learning_rate": 4.4543498309253e-05, + "loss": 3.0696, + "step": 63900 + }, + { + "epoch": 1.64, + "learning_rate": 4.45349591829764e-05, + "loss": 3.0489, + "step": 64000 + }, + { + "epoch": 1.64, + "learning_rate": 4.4526420056699805e-05, + "loss": 3.027, + "step": 64100 + }, + { + "epoch": 1.64, + "learning_rate": 4.45178809304232e-05, + "loss": 3.2089, + "step": 64200 + }, + { + "epoch": 1.65, + "learning_rate": 4.45093418041466e-05, + "loss": 3.1203, + "step": 64300 + }, + { + "epoch": 1.65, + "learning_rate": 4.450080267787e-05, + "loss": 3.0265, + "step": 64400 + }, + { + "epoch": 1.65, + "learning_rate": 4.4492263551593405e-05, + "loss": 3.2565, + "step": 64500 + }, + { + "epoch": 1.65, + "learning_rate": 4.4483724425316806e-05, + "loss": 2.9759, + "step": 64600 + }, + { + "epoch": 1.66, + "learning_rate": 4.447518529904021e-05, + "loss": 3.3455, + "step": 64700 + }, + { + "epoch": 1.66, + "learning_rate": 4.44666461727636e-05, + "loss": 3.1068, + "step": 64800 + }, + { + "epoch": 1.66, + "learning_rate": 4.445810704648701e-05, + "loss": 3.1791, + "step": 64900 + }, + { + "epoch": 1.67, + "learning_rate": 4.4449567920210406e-05, + "loss": 3.1656, + "step": 65000 + }, + { + "epoch": 1.67, + "learning_rate": 4.444102879393381e-05, + "loss": 3.235, + "step": 65100 + }, + { + "epoch": 1.67, + "learning_rate": 4.443248966765721e-05, + "loss": 3.0505, + "step": 65200 + }, + { + "epoch": 1.67, + "learning_rate": 4.442395054138061e-05, + "loss": 3.2848, + "step": 65300 + }, + { + "epoch": 1.68, + "learning_rate": 4.4415411415104005e-05, + "loss": 3.0479, + "step": 65400 + }, + { + "epoch": 1.68, + "learning_rate": 4.440687228882741e-05, + "loss": 3.2758, + "step": 65500 + }, + { + "epoch": 1.68, + "learning_rate": 4.439833316255081e-05, + "loss": 3.0013, + "step": 65600 + }, + { + "epoch": 1.68, + "learning_rate": 4.438979403627421e-05, + "loss": 3.1324, + "step": 65700 + }, + { + "epoch": 1.69, + "learning_rate": 4.438125490999761e-05, + "loss": 3.0407, + "step": 65800 + }, + { + "epoch": 1.69, + "learning_rate": 4.437271578372101e-05, + "loss": 3.2393, + "step": 65900 + }, + { + "epoch": 1.69, + "learning_rate": 4.4364176657444414e-05, + "loss": 3.1361, + "step": 66000 + }, + { + "epoch": 1.69, + "learning_rate": 4.4355637531167816e-05, + "loss": 3.2417, + "step": 66100 + }, + { + "epoch": 1.7, + "learning_rate": 4.434709840489121e-05, + "loss": 3.2056, + "step": 66200 + }, + { + "epoch": 1.7, + "learning_rate": 4.433855927861462e-05, + "loss": 3.2976, + "step": 66300 + }, + { + "epoch": 1.7, + "learning_rate": 4.4330020152338014e-05, + "loss": 3.2442, + "step": 66400 + }, + { + "epoch": 1.7, + "learning_rate": 4.4321481026061415e-05, + "loss": 3.1343, + "step": 66500 + }, + { + "epoch": 1.71, + "learning_rate": 4.431294189978482e-05, + "loss": 3.213, + "step": 66600 + }, + { + "epoch": 1.71, + "learning_rate": 4.430440277350822e-05, + "loss": 3.1263, + "step": 66700 + }, + { + "epoch": 1.71, + "learning_rate": 4.429586364723162e-05, + "loss": 3.2514, + "step": 66800 + }, + { + "epoch": 1.71, + "learning_rate": 4.428732452095502e-05, + "loss": 3.1529, + "step": 66900 + }, + { + "epoch": 1.72, + "learning_rate": 4.4278785394678416e-05, + "loss": 3.1256, + "step": 67000 + }, + { + "epoch": 1.72, + "learning_rate": 4.4270246268401825e-05, + "loss": 3.3028, + "step": 67100 + }, + { + "epoch": 1.72, + "learning_rate": 4.426170714212522e-05, + "loss": 2.9583, + "step": 67200 + }, + { + "epoch": 1.72, + "learning_rate": 4.425316801584862e-05, + "loss": 3.3054, + "step": 67300 + }, + { + "epoch": 1.73, + "learning_rate": 4.424462888957202e-05, + "loss": 3.3126, + "step": 67400 + }, + { + "epoch": 1.73, + "learning_rate": 4.4236089763295424e-05, + "loss": 3.0507, + "step": 67500 + }, + { + "epoch": 1.73, + "learning_rate": 4.422755063701882e-05, + "loss": 3.1097, + "step": 67600 + }, + { + "epoch": 1.73, + "learning_rate": 4.421901151074223e-05, + "loss": 2.932, + "step": 67700 + }, + { + "epoch": 1.74, + "learning_rate": 4.421047238446562e-05, + "loss": 3.1756, + "step": 67800 + }, + { + "epoch": 1.74, + "learning_rate": 4.4201933258189023e-05, + "loss": 3.126, + "step": 67900 + }, + { + "epoch": 1.74, + "learning_rate": 4.4193394131912425e-05, + "loss": 3.154, + "step": 68000 + }, + { + "epoch": 1.74, + "learning_rate": 4.4184855005635827e-05, + "loss": 3.1391, + "step": 68100 + }, + { + "epoch": 1.75, + "learning_rate": 4.417631587935923e-05, + "loss": 3.2438, + "step": 68200 + }, + { + "epoch": 1.75, + "learning_rate": 4.416777675308263e-05, + "loss": 3.1666, + "step": 68300 + }, + { + "epoch": 1.75, + "learning_rate": 4.4159237626806024e-05, + "loss": 3.1894, + "step": 68400 + }, + { + "epoch": 1.75, + "learning_rate": 4.415069850052943e-05, + "loss": 3.1682, + "step": 68500 + }, + { + "epoch": 1.76, + "learning_rate": 4.414215937425283e-05, + "loss": 3.1575, + "step": 68600 + }, + { + "epoch": 1.76, + "learning_rate": 4.413362024797623e-05, + "loss": 2.9601, + "step": 68700 + }, + { + "epoch": 1.76, + "learning_rate": 4.412508112169963e-05, + "loss": 3.2788, + "step": 68800 + }, + { + "epoch": 1.77, + "learning_rate": 4.411654199542303e-05, + "loss": 3.3421, + "step": 68900 + }, + { + "epoch": 1.77, + "learning_rate": 4.4108002869146434e-05, + "loss": 3.0026, + "step": 69000 + }, + { + "epoch": 1.77, + "learning_rate": 4.4099463742869835e-05, + "loss": 3.2379, + "step": 69100 + }, + { + "epoch": 1.77, + "learning_rate": 4.409092461659323e-05, + "loss": 3.2624, + "step": 69200 + }, + { + "epoch": 1.78, + "learning_rate": 4.408238549031664e-05, + "loss": 3.0439, + "step": 69300 + }, + { + "epoch": 1.78, + "learning_rate": 4.407384636404003e-05, + "loss": 3.2517, + "step": 69400 + }, + { + "epoch": 1.78, + "learning_rate": 4.4065307237763435e-05, + "loss": 3.259, + "step": 69500 + }, + { + "epoch": 1.78, + "learning_rate": 4.4056768111486836e-05, + "loss": 3.1286, + "step": 69600 + }, + { + "epoch": 1.79, + "learning_rate": 4.404822898521024e-05, + "loss": 3.2362, + "step": 69700 + }, + { + "epoch": 1.79, + "learning_rate": 4.403968985893364e-05, + "loss": 3.1902, + "step": 69800 + }, + { + "epoch": 1.79, + "learning_rate": 4.403115073265704e-05, + "loss": 3.4516, + "step": 69900 + }, + { + "epoch": 1.79, + "learning_rate": 4.4022611606380436e-05, + "loss": 3.0569, + "step": 70000 + }, + { + "epoch": 1.8, + "learning_rate": 4.4014072480103844e-05, + "loss": 3.2138, + "step": 70100 + }, + { + "epoch": 1.8, + "learning_rate": 4.400553335382724e-05, + "loss": 3.1757, + "step": 70200 + }, + { + "epoch": 1.8, + "learning_rate": 4.3996994227550633e-05, + "loss": 3.2162, + "step": 70300 + }, + { + "epoch": 1.8, + "learning_rate": 4.398845510127404e-05, + "loss": 3.2975, + "step": 70400 + }, + { + "epoch": 1.81, + "learning_rate": 4.3979915974997437e-05, + "loss": 3.1081, + "step": 70500 + }, + { + "epoch": 1.81, + "learning_rate": 4.397137684872084e-05, + "loss": 3.1195, + "step": 70600 + }, + { + "epoch": 1.81, + "learning_rate": 4.396283772244424e-05, + "loss": 3.2665, + "step": 70700 + }, + { + "epoch": 1.81, + "learning_rate": 4.395429859616764e-05, + "loss": 3.125, + "step": 70800 + }, + { + "epoch": 1.82, + "learning_rate": 4.394575946989104e-05, + "loss": 3.2652, + "step": 70900 + }, + { + "epoch": 1.82, + "learning_rate": 4.3937220343614444e-05, + "loss": 3.0792, + "step": 71000 + }, + { + "epoch": 1.82, + "learning_rate": 4.392868121733784e-05, + "loss": 3.2215, + "step": 71100 + }, + { + "epoch": 1.82, + "learning_rate": 4.392014209106125e-05, + "loss": 2.9989, + "step": 71200 + }, + { + "epoch": 1.83, + "learning_rate": 4.391160296478464e-05, + "loss": 3.2054, + "step": 71300 + }, + { + "epoch": 1.83, + "learning_rate": 4.3903063838508044e-05, + "loss": 3.1994, + "step": 71400 + }, + { + "epoch": 1.83, + "learning_rate": 4.3894524712231445e-05, + "loss": 3.0047, + "step": 71500 + }, + { + "epoch": 1.83, + "learning_rate": 4.388598558595485e-05, + "loss": 2.9717, + "step": 71600 + }, + { + "epoch": 1.84, + "learning_rate": 4.387744645967825e-05, + "loss": 3.1778, + "step": 71700 + }, + { + "epoch": 1.84, + "learning_rate": 4.386890733340165e-05, + "loss": 3.1838, + "step": 71800 + }, + { + "epoch": 1.84, + "learning_rate": 4.3860368207125045e-05, + "loss": 3.0509, + "step": 71900 + }, + { + "epoch": 1.84, + "learning_rate": 4.385182908084845e-05, + "loss": 3.0945, + "step": 72000 + }, + { + "epoch": 1.85, + "learning_rate": 4.384328995457185e-05, + "loss": 3.2435, + "step": 72100 + }, + { + "epoch": 1.85, + "learning_rate": 4.383475082829525e-05, + "loss": 3.134, + "step": 72200 + }, + { + "epoch": 1.85, + "learning_rate": 4.382621170201865e-05, + "loss": 3.4882, + "step": 72300 + }, + { + "epoch": 1.85, + "learning_rate": 4.381767257574205e-05, + "loss": 3.4011, + "step": 72400 + }, + { + "epoch": 1.86, + "learning_rate": 4.380913344946545e-05, + "loss": 3.1124, + "step": 72500 + }, + { + "epoch": 1.86, + "learning_rate": 4.3800594323188856e-05, + "loss": 3.2966, + "step": 72600 + }, + { + "epoch": 1.86, + "learning_rate": 4.379205519691225e-05, + "loss": 3.3746, + "step": 72700 + }, + { + "epoch": 1.86, + "learning_rate": 4.378351607063565e-05, + "loss": 3.085, + "step": 72800 + }, + { + "epoch": 1.87, + "learning_rate": 4.3774976944359053e-05, + "loss": 3.1161, + "step": 72900 + }, + { + "epoch": 1.87, + "learning_rate": 4.3766437818082455e-05, + "loss": 3.0355, + "step": 73000 + }, + { + "epoch": 1.87, + "learning_rate": 4.3757898691805857e-05, + "loss": 3.1345, + "step": 73100 + }, + { + "epoch": 1.88, + "learning_rate": 4.374935956552926e-05, + "loss": 3.2653, + "step": 73200 + }, + { + "epoch": 1.88, + "learning_rate": 4.374082043925265e-05, + "loss": 3.0472, + "step": 73300 + }, + { + "epoch": 1.88, + "learning_rate": 4.373228131297606e-05, + "loss": 3.0984, + "step": 73400 + }, + { + "epoch": 1.88, + "learning_rate": 4.3723742186699456e-05, + "loss": 3.2532, + "step": 73500 + }, + { + "epoch": 1.89, + "learning_rate": 4.371520306042286e-05, + "loss": 3.2685, + "step": 73600 + }, + { + "epoch": 1.89, + "learning_rate": 4.370666393414626e-05, + "loss": 3.2064, + "step": 73700 + }, + { + "epoch": 1.89, + "learning_rate": 4.369812480786966e-05, + "loss": 2.9865, + "step": 73800 + }, + { + "epoch": 1.89, + "learning_rate": 4.368958568159306e-05, + "loss": 3.0411, + "step": 73900 + }, + { + "epoch": 1.9, + "learning_rate": 4.3681046555316464e-05, + "loss": 3.0619, + "step": 74000 + }, + { + "epoch": 1.9, + "learning_rate": 4.367250742903986e-05, + "loss": 3.271, + "step": 74100 + }, + { + "epoch": 1.9, + "learning_rate": 4.366396830276327e-05, + "loss": 3.2532, + "step": 74200 + }, + { + "epoch": 1.9, + "learning_rate": 4.365542917648666e-05, + "loss": 3.18, + "step": 74300 + }, + { + "epoch": 1.91, + "learning_rate": 4.364689005021006e-05, + "loss": 3.3295, + "step": 74400 + }, + { + "epoch": 1.91, + "learning_rate": 4.3638350923933465e-05, + "loss": 3.1967, + "step": 74500 + }, + { + "epoch": 1.91, + "learning_rate": 4.3629811797656866e-05, + "loss": 3.2007, + "step": 74600 + }, + { + "epoch": 1.91, + "learning_rate": 4.362127267138027e-05, + "loss": 3.2682, + "step": 74700 + }, + { + "epoch": 1.92, + "learning_rate": 4.361273354510367e-05, + "loss": 2.9575, + "step": 74800 + }, + { + "epoch": 1.92, + "learning_rate": 4.3604194418827064e-05, + "loss": 3.2221, + "step": 74900 + }, + { + "epoch": 1.92, + "learning_rate": 4.359565529255047e-05, + "loss": 2.9996, + "step": 75000 + }, + { + "epoch": 1.92, + "learning_rate": 4.358711616627387e-05, + "loss": 3.0046, + "step": 75100 + }, + { + "epoch": 1.93, + "learning_rate": 4.357857703999727e-05, + "loss": 3.1096, + "step": 75200 + }, + { + "epoch": 1.93, + "learning_rate": 4.357003791372067e-05, + "loss": 3.3285, + "step": 75300 + }, + { + "epoch": 1.93, + "learning_rate": 4.356149878744407e-05, + "loss": 3.1116, + "step": 75400 + }, + { + "epoch": 1.93, + "learning_rate": 4.3552959661167467e-05, + "loss": 3.2307, + "step": 75500 + }, + { + "epoch": 1.94, + "learning_rate": 4.3544420534890875e-05, + "loss": 3.3776, + "step": 75600 + }, + { + "epoch": 1.94, + "learning_rate": 4.353588140861427e-05, + "loss": 3.2924, + "step": 75700 + }, + { + "epoch": 1.94, + "learning_rate": 4.352734228233767e-05, + "loss": 3.117, + "step": 75800 + }, + { + "epoch": 1.94, + "learning_rate": 4.351880315606107e-05, + "loss": 3.2263, + "step": 75900 + }, + { + "epoch": 1.95, + "learning_rate": 4.3510264029784474e-05, + "loss": 3.0552, + "step": 76000 + }, + { + "epoch": 1.95, + "learning_rate": 4.3501724903507876e-05, + "loss": 3.142, + "step": 76100 + }, + { + "epoch": 1.95, + "learning_rate": 4.349318577723128e-05, + "loss": 3.3654, + "step": 76200 + }, + { + "epoch": 1.95, + "learning_rate": 4.348464665095467e-05, + "loss": 3.1482, + "step": 76300 + }, + { + "epoch": 1.96, + "learning_rate": 4.347610752467808e-05, + "loss": 3.1302, + "step": 76400 + }, + { + "epoch": 1.96, + "learning_rate": 4.3467568398401475e-05, + "loss": 3.2023, + "step": 76500 + }, + { + "epoch": 1.96, + "learning_rate": 4.345902927212488e-05, + "loss": 3.0512, + "step": 76600 + }, + { + "epoch": 1.96, + "learning_rate": 4.345049014584828e-05, + "loss": 3.2809, + "step": 76700 + }, + { + "epoch": 1.97, + "learning_rate": 4.344195101957168e-05, + "loss": 3.1384, + "step": 76800 + }, + { + "epoch": 1.97, + "learning_rate": 4.343341189329508e-05, + "loss": 3.2299, + "step": 76900 + }, + { + "epoch": 1.97, + "learning_rate": 4.342487276701848e-05, + "loss": 3.0812, + "step": 77000 + }, + { + "epoch": 1.98, + "learning_rate": 4.341633364074188e-05, + "loss": 3.2408, + "step": 77100 + }, + { + "epoch": 1.98, + "learning_rate": 4.3407794514465286e-05, + "loss": 3.1539, + "step": 77200 + }, + { + "epoch": 1.98, + "learning_rate": 4.339925538818868e-05, + "loss": 3.1098, + "step": 77300 + }, + { + "epoch": 1.98, + "learning_rate": 4.339071626191208e-05, + "loss": 3.066, + "step": 77400 + }, + { + "epoch": 1.99, + "learning_rate": 4.3382177135635484e-05, + "loss": 2.984, + "step": 77500 + }, + { + "epoch": 1.99, + "learning_rate": 4.3373638009358886e-05, + "loss": 3.2029, + "step": 77600 + }, + { + "epoch": 1.99, + "learning_rate": 4.336509888308228e-05, + "loss": 3.1193, + "step": 77700 + }, + { + "epoch": 1.99, + "learning_rate": 4.335655975680569e-05, + "loss": 3.193, + "step": 77800 + }, + { + "epoch": 2.0, + "learning_rate": 4.3348020630529083e-05, + "loss": 3.2723, + "step": 77900 + }, + { + "epoch": 2.0, + "learning_rate": 4.3339481504252485e-05, + "loss": 3.3588, + "step": 78000 + }, + { + "epoch": 2.0, + "learning_rate": 4.3330942377975886e-05, + "loss": 3.0575, + "step": 78100 + }, + { + "epoch": 2.0, + "learning_rate": 4.332240325169929e-05, + "loss": 2.9386, + "step": 78200 + }, + { + "epoch": 2.01, + "learning_rate": 4.331386412542269e-05, + "loss": 3.1232, + "step": 78300 + }, + { + "epoch": 2.01, + "learning_rate": 4.330532499914609e-05, + "loss": 2.9355, + "step": 78400 + }, + { + "epoch": 2.01, + "learning_rate": 4.3296785872869486e-05, + "loss": 3.0254, + "step": 78500 + }, + { + "epoch": 2.01, + "learning_rate": 4.3288246746592894e-05, + "loss": 3.2184, + "step": 78600 + }, + { + "epoch": 2.02, + "learning_rate": 4.327970762031629e-05, + "loss": 3.0451, + "step": 78700 + }, + { + "epoch": 2.02, + "learning_rate": 4.327116849403969e-05, + "loss": 3.085, + "step": 78800 + }, + { + "epoch": 2.02, + "learning_rate": 4.326262936776309e-05, + "loss": 2.9988, + "step": 78900 + }, + { + "epoch": 2.02, + "learning_rate": 4.3254090241486494e-05, + "loss": 3.0037, + "step": 79000 + }, + { + "epoch": 2.03, + "learning_rate": 4.3245551115209895e-05, + "loss": 3.1751, + "step": 79100 + }, + { + "epoch": 2.03, + "learning_rate": 4.32370119889333e-05, + "loss": 3.0905, + "step": 79200 + }, + { + "epoch": 2.03, + "learning_rate": 4.322847286265669e-05, + "loss": 3.0897, + "step": 79300 + }, + { + "epoch": 2.03, + "learning_rate": 4.32199337363801e-05, + "loss": 3.1822, + "step": 79400 + }, + { + "epoch": 2.04, + "learning_rate": 4.3211394610103495e-05, + "loss": 3.2876, + "step": 79500 + }, + { + "epoch": 2.04, + "learning_rate": 4.3202855483826896e-05, + "loss": 3.1157, + "step": 79600 + }, + { + "epoch": 2.04, + "learning_rate": 4.31943163575503e-05, + "loss": 3.1676, + "step": 79700 + }, + { + "epoch": 2.04, + "learning_rate": 4.31857772312737e-05, + "loss": 3.2256, + "step": 79800 + }, + { + "epoch": 2.05, + "learning_rate": 4.31772381049971e-05, + "loss": 3.162, + "step": 79900 + }, + { + "epoch": 2.05, + "learning_rate": 4.31686989787205e-05, + "loss": 3.0349, + "step": 80000 + }, + { + "epoch": 2.05, + "learning_rate": 4.31601598524439e-05, + "loss": 3.0997, + "step": 80100 + }, + { + "epoch": 2.05, + "learning_rate": 4.3151620726167305e-05, + "loss": 3.1964, + "step": 80200 + }, + { + "epoch": 2.06, + "learning_rate": 4.31430815998907e-05, + "loss": 3.1396, + "step": 80300 + }, + { + "epoch": 2.06, + "learning_rate": 4.31345424736141e-05, + "loss": 3.1143, + "step": 80400 + }, + { + "epoch": 2.06, + "learning_rate": 4.31260033473375e-05, + "loss": 3.0063, + "step": 80500 + }, + { + "epoch": 2.06, + "learning_rate": 4.3117464221060905e-05, + "loss": 3.2515, + "step": 80600 + }, + { + "epoch": 2.07, + "learning_rate": 4.31089250947843e-05, + "loss": 3.1233, + "step": 80700 + }, + { + "epoch": 2.07, + "learning_rate": 4.310038596850771e-05, + "loss": 2.9936, + "step": 80800 + }, + { + "epoch": 2.07, + "learning_rate": 4.30918468422311e-05, + "loss": 2.9814, + "step": 80900 + }, + { + "epoch": 2.08, + "learning_rate": 4.3083307715954504e-05, + "loss": 3.2333, + "step": 81000 + }, + { + "epoch": 2.08, + "learning_rate": 4.3074768589677906e-05, + "loss": 3.2694, + "step": 81100 + }, + { + "epoch": 2.08, + "learning_rate": 4.306622946340131e-05, + "loss": 3.0663, + "step": 81200 + }, + { + "epoch": 2.08, + "learning_rate": 4.305769033712471e-05, + "loss": 3.4595, + "step": 81300 + }, + { + "epoch": 2.09, + "learning_rate": 4.304915121084811e-05, + "loss": 3.2051, + "step": 81400 + }, + { + "epoch": 2.09, + "learning_rate": 4.3040612084571505e-05, + "loss": 3.2371, + "step": 81500 + }, + { + "epoch": 2.09, + "learning_rate": 4.3032072958294914e-05, + "loss": 3.1418, + "step": 81600 + }, + { + "epoch": 2.09, + "learning_rate": 4.302353383201831e-05, + "loss": 3.3268, + "step": 81700 + }, + { + "epoch": 2.1, + "learning_rate": 4.301499470574171e-05, + "loss": 3.1883, + "step": 81800 + }, + { + "epoch": 2.1, + "learning_rate": 4.300645557946511e-05, + "loss": 3.2198, + "step": 81900 + }, + { + "epoch": 2.1, + "learning_rate": 4.299791645318851e-05, + "loss": 3.3429, + "step": 82000 + }, + { + "epoch": 2.1, + "learning_rate": 4.2989377326911915e-05, + "loss": 3.0335, + "step": 82100 + }, + { + "epoch": 2.11, + "learning_rate": 4.2980838200635316e-05, + "loss": 3.1086, + "step": 82200 + }, + { + "epoch": 2.11, + "learning_rate": 4.297229907435871e-05, + "loss": 3.1151, + "step": 82300 + }, + { + "epoch": 2.11, + "learning_rate": 4.296375994808212e-05, + "loss": 3.1687, + "step": 82400 + }, + { + "epoch": 2.11, + "learning_rate": 4.2955220821805514e-05, + "loss": 2.9901, + "step": 82500 + }, + { + "epoch": 2.12, + "learning_rate": 4.2946681695528916e-05, + "loss": 3.3057, + "step": 82600 + }, + { + "epoch": 2.12, + "learning_rate": 4.293814256925232e-05, + "loss": 3.1803, + "step": 82700 + }, + { + "epoch": 2.12, + "learning_rate": 4.292960344297572e-05, + "loss": 3.0191, + "step": 82800 + }, + { + "epoch": 2.12, + "learning_rate": 4.292106431669911e-05, + "loss": 3.1051, + "step": 82900 + }, + { + "epoch": 2.13, + "learning_rate": 4.291252519042252e-05, + "loss": 3.2292, + "step": 83000 + }, + { + "epoch": 2.13, + "learning_rate": 4.2903986064145916e-05, + "loss": 3.0433, + "step": 83100 + }, + { + "epoch": 2.13, + "learning_rate": 4.289544693786932e-05, + "loss": 3.1835, + "step": 83200 + }, + { + "epoch": 2.13, + "learning_rate": 4.288690781159272e-05, + "loss": 3.3649, + "step": 83300 + }, + { + "epoch": 2.14, + "learning_rate": 4.287836868531612e-05, + "loss": 3.0549, + "step": 83400 + }, + { + "epoch": 2.14, + "learning_rate": 4.286982955903952e-05, + "loss": 3.159, + "step": 83500 + }, + { + "epoch": 2.14, + "learning_rate": 4.2861290432762924e-05, + "loss": 3.0583, + "step": 83600 + }, + { + "epoch": 2.14, + "learning_rate": 4.285275130648632e-05, + "loss": 3.1267, + "step": 83700 + }, + { + "epoch": 2.15, + "learning_rate": 4.284421218020973e-05, + "loss": 3.1198, + "step": 83800 + }, + { + "epoch": 2.15, + "learning_rate": 4.283567305393312e-05, + "loss": 3.261, + "step": 83900 + }, + { + "epoch": 2.15, + "learning_rate": 4.2827133927656524e-05, + "loss": 2.9281, + "step": 84000 + }, + { + "epoch": 2.15, + "learning_rate": 4.2818594801379925e-05, + "loss": 3.0939, + "step": 84100 + }, + { + "epoch": 2.16, + "learning_rate": 4.281005567510333e-05, + "loss": 3.0363, + "step": 84200 + }, + { + "epoch": 2.16, + "learning_rate": 4.280151654882673e-05, + "loss": 3.068, + "step": 84300 + }, + { + "epoch": 2.16, + "learning_rate": 4.279297742255013e-05, + "loss": 3.129, + "step": 84400 + }, + { + "epoch": 2.16, + "learning_rate": 4.2784438296273525e-05, + "loss": 3.2133, + "step": 84500 + }, + { + "epoch": 2.17, + "learning_rate": 4.277589916999693e-05, + "loss": 3.2536, + "step": 84600 + }, + { + "epoch": 2.17, + "learning_rate": 4.276736004372033e-05, + "loss": 2.9967, + "step": 84700 + }, + { + "epoch": 2.17, + "learning_rate": 4.275882091744373e-05, + "loss": 2.9866, + "step": 84800 + }, + { + "epoch": 2.17, + "learning_rate": 4.275028179116713e-05, + "loss": 3.1255, + "step": 84900 + }, + { + "epoch": 2.18, + "learning_rate": 4.274174266489053e-05, + "loss": 2.9926, + "step": 85000 + }, + { + "epoch": 2.18, + "learning_rate": 4.2733203538613934e-05, + "loss": 3.0661, + "step": 85100 + }, + { + "epoch": 2.18, + "learning_rate": 4.2724664412337335e-05, + "loss": 3.1502, + "step": 85200 + }, + { + "epoch": 2.19, + "learning_rate": 4.271612528606073e-05, + "loss": 2.8778, + "step": 85300 + }, + { + "epoch": 2.19, + "learning_rate": 4.270758615978414e-05, + "loss": 3.2147, + "step": 85400 + }, + { + "epoch": 2.19, + "learning_rate": 4.269904703350753e-05, + "loss": 2.9854, + "step": 85500 + }, + { + "epoch": 2.19, + "learning_rate": 4.2690507907230935e-05, + "loss": 3.1181, + "step": 85600 + }, + { + "epoch": 2.2, + "learning_rate": 4.2681968780954336e-05, + "loss": 2.9929, + "step": 85700 + }, + { + "epoch": 2.2, + "learning_rate": 4.267342965467774e-05, + "loss": 3.1671, + "step": 85800 + }, + { + "epoch": 2.2, + "learning_rate": 4.266489052840113e-05, + "loss": 3.0857, + "step": 85900 + }, + { + "epoch": 2.2, + "learning_rate": 4.265635140212454e-05, + "loss": 3.1893, + "step": 86000 + }, + { + "epoch": 2.21, + "learning_rate": 4.2647812275847936e-05, + "loss": 3.1835, + "step": 86100 + }, + { + "epoch": 2.21, + "learning_rate": 4.263927314957134e-05, + "loss": 3.3165, + "step": 86200 + }, + { + "epoch": 2.21, + "learning_rate": 4.263073402329474e-05, + "loss": 3.0212, + "step": 86300 + }, + { + "epoch": 2.21, + "learning_rate": 4.262219489701814e-05, + "loss": 3.0893, + "step": 86400 + }, + { + "epoch": 2.22, + "learning_rate": 4.261365577074154e-05, + "loss": 3.0834, + "step": 86500 + }, + { + "epoch": 2.22, + "learning_rate": 4.2605116644464944e-05, + "loss": 3.0804, + "step": 86600 + }, + { + "epoch": 2.22, + "learning_rate": 4.259657751818834e-05, + "loss": 3.133, + "step": 86700 + }, + { + "epoch": 2.22, + "learning_rate": 4.258803839191175e-05, + "loss": 3.3209, + "step": 86800 + }, + { + "epoch": 2.23, + "learning_rate": 4.257949926563514e-05, + "loss": 3.1159, + "step": 86900 + }, + { + "epoch": 2.23, + "learning_rate": 4.257096013935854e-05, + "loss": 3.0212, + "step": 87000 + }, + { + "epoch": 2.23, + "learning_rate": 4.2562421013081945e-05, + "loss": 3.0374, + "step": 87100 + }, + { + "epoch": 2.23, + "learning_rate": 4.2553881886805346e-05, + "loss": 2.8671, + "step": 87200 + }, + { + "epoch": 2.24, + "learning_rate": 4.254534276052875e-05, + "loss": 3.211, + "step": 87300 + }, + { + "epoch": 2.24, + "learning_rate": 4.253680363425215e-05, + "loss": 3.2994, + "step": 87400 + }, + { + "epoch": 2.24, + "learning_rate": 4.2528264507975544e-05, + "loss": 3.1808, + "step": 87500 + }, + { + "epoch": 2.24, + "learning_rate": 4.251972538169895e-05, + "loss": 3.0728, + "step": 87600 + }, + { + "epoch": 2.25, + "learning_rate": 4.251118625542235e-05, + "loss": 3.1494, + "step": 87700 + }, + { + "epoch": 2.25, + "learning_rate": 4.250264712914574e-05, + "loss": 3.0919, + "step": 87800 + }, + { + "epoch": 2.25, + "learning_rate": 4.249410800286915e-05, + "loss": 3.1287, + "step": 87900 + }, + { + "epoch": 2.25, + "learning_rate": 4.2485568876592545e-05, + "loss": 3.0724, + "step": 88000 + }, + { + "epoch": 2.26, + "learning_rate": 4.2477029750315946e-05, + "loss": 3.1844, + "step": 88100 + }, + { + "epoch": 2.26, + "learning_rate": 4.246849062403935e-05, + "loss": 2.9444, + "step": 88200 + }, + { + "epoch": 2.26, + "learning_rate": 4.245995149776275e-05, + "loss": 3.2252, + "step": 88300 + }, + { + "epoch": 2.26, + "learning_rate": 4.245141237148615e-05, + "loss": 2.9772, + "step": 88400 + }, + { + "epoch": 2.27, + "learning_rate": 4.244287324520955e-05, + "loss": 3.1618, + "step": 88500 + }, + { + "epoch": 2.27, + "learning_rate": 4.243433411893295e-05, + "loss": 3.2035, + "step": 88600 + }, + { + "epoch": 2.27, + "learning_rate": 4.2425794992656356e-05, + "loss": 3.03, + "step": 88700 + }, + { + "epoch": 2.27, + "learning_rate": 4.241725586637975e-05, + "loss": 3.0816, + "step": 88800 + }, + { + "epoch": 2.28, + "learning_rate": 4.240871674010315e-05, + "loss": 3.0276, + "step": 88900 + }, + { + "epoch": 2.28, + "learning_rate": 4.2400177613826554e-05, + "loss": 3.1817, + "step": 89000 + }, + { + "epoch": 2.28, + "learning_rate": 4.2391638487549955e-05, + "loss": 2.8854, + "step": 89100 + }, + { + "epoch": 2.29, + "learning_rate": 4.238309936127336e-05, + "loss": 2.99, + "step": 89200 + }, + { + "epoch": 2.29, + "learning_rate": 4.237456023499676e-05, + "loss": 3.2029, + "step": 89300 + }, + { + "epoch": 2.29, + "learning_rate": 4.236602110872015e-05, + "loss": 3.1559, + "step": 89400 + }, + { + "epoch": 2.29, + "learning_rate": 4.235748198244356e-05, + "loss": 3.1818, + "step": 89500 + }, + { + "epoch": 2.3, + "learning_rate": 4.2348942856166956e-05, + "loss": 3.1181, + "step": 89600 + }, + { + "epoch": 2.3, + "learning_rate": 4.234040372989036e-05, + "loss": 3.1893, + "step": 89700 + }, + { + "epoch": 2.3, + "learning_rate": 4.233186460361376e-05, + "loss": 3.1009, + "step": 89800 + }, + { + "epoch": 2.3, + "learning_rate": 4.232332547733716e-05, + "loss": 3.2049, + "step": 89900 + }, + { + "epoch": 2.31, + "learning_rate": 4.231478635106056e-05, + "loss": 3.0272, + "step": 90000 + }, + { + "epoch": 2.31, + "learning_rate": 4.2306247224783964e-05, + "loss": 3.2394, + "step": 90100 + }, + { + "epoch": 2.31, + "learning_rate": 4.229770809850736e-05, + "loss": 2.8205, + "step": 90200 + }, + { + "epoch": 2.31, + "learning_rate": 4.228916897223077e-05, + "loss": 3.2412, + "step": 90300 + }, + { + "epoch": 2.32, + "learning_rate": 4.228062984595416e-05, + "loss": 2.9937, + "step": 90400 + }, + { + "epoch": 2.32, + "learning_rate": 4.227209071967756e-05, + "loss": 3.2042, + "step": 90500 + }, + { + "epoch": 2.32, + "learning_rate": 4.2263551593400965e-05, + "loss": 3.0972, + "step": 90600 + }, + { + "epoch": 2.32, + "learning_rate": 4.2255012467124366e-05, + "loss": 3.2973, + "step": 90700 + }, + { + "epoch": 2.33, + "learning_rate": 4.224647334084776e-05, + "loss": 3.0852, + "step": 90800 + }, + { + "epoch": 2.33, + "learning_rate": 4.223793421457117e-05, + "loss": 3.0435, + "step": 90900 + }, + { + "epoch": 2.33, + "learning_rate": 4.2229395088294564e-05, + "loss": 3.1896, + "step": 91000 + }, + { + "epoch": 2.33, + "learning_rate": 4.2220855962017966e-05, + "loss": 3.2066, + "step": 91100 + }, + { + "epoch": 2.34, + "learning_rate": 4.221231683574137e-05, + "loss": 3.0411, + "step": 91200 + }, + { + "epoch": 2.34, + "learning_rate": 4.220377770946477e-05, + "loss": 3.1446, + "step": 91300 + }, + { + "epoch": 2.34, + "learning_rate": 4.219523858318817e-05, + "loss": 2.9866, + "step": 91400 + }, + { + "epoch": 2.34, + "learning_rate": 4.218669945691157e-05, + "loss": 2.9629, + "step": 91500 + }, + { + "epoch": 2.35, + "learning_rate": 4.217816033063497e-05, + "loss": 3.0371, + "step": 91600 + }, + { + "epoch": 2.35, + "learning_rate": 4.2169621204358375e-05, + "loss": 3.1809, + "step": 91700 + }, + { + "epoch": 2.35, + "learning_rate": 4.216108207808177e-05, + "loss": 3.2357, + "step": 91800 + }, + { + "epoch": 2.35, + "learning_rate": 4.215254295180517e-05, + "loss": 3.298, + "step": 91900 + }, + { + "epoch": 2.36, + "learning_rate": 4.214400382552857e-05, + "loss": 3.0594, + "step": 92000 + }, + { + "epoch": 2.36, + "learning_rate": 4.2135464699251974e-05, + "loss": 2.8645, + "step": 92100 + }, + { + "epoch": 2.36, + "learning_rate": 4.2126925572975376e-05, + "loss": 3.1693, + "step": 92200 + }, + { + "epoch": 2.36, + "learning_rate": 4.211838644669878e-05, + "loss": 3.0464, + "step": 92300 + }, + { + "epoch": 2.37, + "learning_rate": 4.210984732042217e-05, + "loss": 3.0673, + "step": 92400 + }, + { + "epoch": 2.37, + "learning_rate": 4.210130819414558e-05, + "loss": 2.959, + "step": 92500 + }, + { + "epoch": 2.37, + "learning_rate": 4.2092769067868975e-05, + "loss": 3.0101, + "step": 92600 + }, + { + "epoch": 2.37, + "learning_rate": 4.208422994159238e-05, + "loss": 3.13, + "step": 92700 + }, + { + "epoch": 2.38, + "learning_rate": 4.207569081531578e-05, + "loss": 3.0754, + "step": 92800 + }, + { + "epoch": 2.38, + "learning_rate": 4.206715168903918e-05, + "loss": 3.0374, + "step": 92900 + }, + { + "epoch": 2.38, + "learning_rate": 4.2058612562762575e-05, + "loss": 3.2083, + "step": 93000 + }, + { + "epoch": 2.38, + "learning_rate": 4.205007343648598e-05, + "loss": 2.9163, + "step": 93100 + }, + { + "epoch": 2.39, + "learning_rate": 4.204153431020938e-05, + "loss": 3.0933, + "step": 93200 + }, + { + "epoch": 2.39, + "learning_rate": 4.2032995183932786e-05, + "loss": 3.125, + "step": 93300 + }, + { + "epoch": 2.39, + "learning_rate": 4.202445605765618e-05, + "loss": 3.1055, + "step": 93400 + }, + { + "epoch": 2.4, + "learning_rate": 4.201591693137958e-05, + "loss": 3.0669, + "step": 93500 + }, + { + "epoch": 2.4, + "learning_rate": 4.2007377805102984e-05, + "loss": 3.0477, + "step": 93600 + }, + { + "epoch": 2.4, + "learning_rate": 4.1998838678826386e-05, + "loss": 3.1347, + "step": 93700 + }, + { + "epoch": 2.4, + "learning_rate": 4.199029955254978e-05, + "loss": 3.1189, + "step": 93800 + }, + { + "epoch": 2.41, + "learning_rate": 4.198176042627319e-05, + "loss": 3.2261, + "step": 93900 + }, + { + "epoch": 2.41, + "learning_rate": 4.1973221299996584e-05, + "loss": 3.0995, + "step": 94000 + }, + { + "epoch": 2.41, + "learning_rate": 4.1964682173719985e-05, + "loss": 3.0714, + "step": 94100 + }, + { + "epoch": 2.41, + "learning_rate": 4.195614304744339e-05, + "loss": 3.2601, + "step": 94200 + }, + { + "epoch": 2.42, + "learning_rate": 4.194760392116679e-05, + "loss": 3.1525, + "step": 94300 + }, + { + "epoch": 2.42, + "learning_rate": 4.193906479489019e-05, + "loss": 3.0394, + "step": 94400 + }, + { + "epoch": 2.42, + "learning_rate": 4.193052566861359e-05, + "loss": 3.2386, + "step": 94500 + }, + { + "epoch": 2.42, + "learning_rate": 4.1921986542336986e-05, + "loss": 3.239, + "step": 94600 + }, + { + "epoch": 2.43, + "learning_rate": 4.1913447416060394e-05, + "loss": 3.0516, + "step": 94700 + }, + { + "epoch": 2.43, + "learning_rate": 4.190490828978379e-05, + "loss": 3.0337, + "step": 94800 + }, + { + "epoch": 2.43, + "learning_rate": 4.189636916350719e-05, + "loss": 2.9627, + "step": 94900 + }, + { + "epoch": 2.43, + "learning_rate": 4.188783003723059e-05, + "loss": 3.0803, + "step": 95000 + }, + { + "epoch": 2.44, + "learning_rate": 4.1879290910953994e-05, + "loss": 2.9824, + "step": 95100 + }, + { + "epoch": 2.44, + "learning_rate": 4.1870751784677395e-05, + "loss": 3.1304, + "step": 95200 + }, + { + "epoch": 2.44, + "learning_rate": 4.18622126584008e-05, + "loss": 3.0516, + "step": 95300 + }, + { + "epoch": 2.44, + "learning_rate": 4.185367353212419e-05, + "loss": 2.9625, + "step": 95400 + }, + { + "epoch": 2.45, + "learning_rate": 4.18451344058476e-05, + "loss": 2.9388, + "step": 95500 + }, + { + "epoch": 2.45, + "learning_rate": 4.1836595279570995e-05, + "loss": 3.1363, + "step": 95600 + }, + { + "epoch": 2.45, + "learning_rate": 4.1828056153294396e-05, + "loss": 2.9823, + "step": 95700 + }, + { + "epoch": 2.45, + "learning_rate": 4.18195170270178e-05, + "loss": 3.108, + "step": 95800 + }, + { + "epoch": 2.46, + "learning_rate": 4.18109779007412e-05, + "loss": 3.1888, + "step": 95900 + }, + { + "epoch": 2.46, + "learning_rate": 4.1802438774464594e-05, + "loss": 3.1066, + "step": 96000 + }, + { + "epoch": 2.46, + "learning_rate": 4.1793899648188e-05, + "loss": 2.9461, + "step": 96100 + }, + { + "epoch": 2.46, + "learning_rate": 4.17853605219114e-05, + "loss": 3.0784, + "step": 96200 + }, + { + "epoch": 2.47, + "learning_rate": 4.17768213956348e-05, + "loss": 3.0233, + "step": 96300 + }, + { + "epoch": 2.47, + "learning_rate": 4.17682822693582e-05, + "loss": 3.0728, + "step": 96400 + }, + { + "epoch": 2.47, + "learning_rate": 4.17597431430816e-05, + "loss": 3.0897, + "step": 96500 + }, + { + "epoch": 2.47, + "learning_rate": 4.1751204016805004e-05, + "loss": 3.0342, + "step": 96600 + }, + { + "epoch": 2.48, + "learning_rate": 4.1742664890528405e-05, + "loss": 2.9922, + "step": 96700 + }, + { + "epoch": 2.48, + "learning_rate": 4.17341257642518e-05, + "loss": 3.0786, + "step": 96800 + }, + { + "epoch": 2.48, + "learning_rate": 4.172558663797521e-05, + "loss": 2.9265, + "step": 96900 + }, + { + "epoch": 2.48, + "learning_rate": 4.17170475116986e-05, + "loss": 3.109, + "step": 97000 + }, + { + "epoch": 2.49, + "learning_rate": 4.1708508385422004e-05, + "loss": 3.229, + "step": 97100 + }, + { + "epoch": 2.49, + "learning_rate": 4.1699969259145406e-05, + "loss": 2.9595, + "step": 97200 + }, + { + "epoch": 2.49, + "learning_rate": 4.169143013286881e-05, + "loss": 2.9587, + "step": 97300 + }, + { + "epoch": 2.5, + "learning_rate": 4.168289100659221e-05, + "loss": 3.0812, + "step": 97400 + }, + { + "epoch": 2.5, + "learning_rate": 4.167435188031561e-05, + "loss": 3.1367, + "step": 97500 + }, + { + "epoch": 2.5, + "learning_rate": 4.1665812754039005e-05, + "loss": 3.0375, + "step": 97600 + }, + { + "epoch": 2.5, + "learning_rate": 4.1657273627762414e-05, + "loss": 3.1682, + "step": 97700 + }, + { + "epoch": 2.51, + "learning_rate": 4.164873450148581e-05, + "loss": 3.0156, + "step": 97800 + }, + { + "epoch": 2.51, + "learning_rate": 4.164019537520921e-05, + "loss": 3.1416, + "step": 97900 + }, + { + "epoch": 2.51, + "learning_rate": 4.163165624893261e-05, + "loss": 3.1544, + "step": 98000 + }, + { + "epoch": 2.51, + "learning_rate": 4.162311712265601e-05, + "loss": 3.1956, + "step": 98100 + }, + { + "epoch": 2.52, + "learning_rate": 4.1614577996379415e-05, + "loss": 3.0813, + "step": 98200 + }, + { + "epoch": 2.52, + "learning_rate": 4.1606038870102816e-05, + "loss": 3.0339, + "step": 98300 + }, + { + "epoch": 2.52, + "learning_rate": 4.159749974382621e-05, + "loss": 3.0538, + "step": 98400 + }, + { + "epoch": 2.52, + "learning_rate": 4.158896061754962e-05, + "loss": 3.152, + "step": 98500 + }, + { + "epoch": 2.53, + "learning_rate": 4.1580421491273014e-05, + "loss": 3.1042, + "step": 98600 + }, + { + "epoch": 2.53, + "learning_rate": 4.1571882364996416e-05, + "loss": 2.8913, + "step": 98700 + }, + { + "epoch": 2.53, + "learning_rate": 4.156334323871982e-05, + "loss": 3.0919, + "step": 98800 + }, + { + "epoch": 2.53, + "learning_rate": 4.155480411244322e-05, + "loss": 3.0475, + "step": 98900 + }, + { + "epoch": 2.54, + "learning_rate": 4.1546264986166614e-05, + "loss": 2.9587, + "step": 99000 + }, + { + "epoch": 2.54, + "learning_rate": 4.153772585989002e-05, + "loss": 3.0237, + "step": 99100 + }, + { + "epoch": 2.54, + "learning_rate": 4.152918673361342e-05, + "loss": 2.9342, + "step": 99200 + }, + { + "epoch": 2.54, + "learning_rate": 4.152064760733682e-05, + "loss": 3.0741, + "step": 99300 + }, + { + "epoch": 2.55, + "learning_rate": 4.151210848106022e-05, + "loss": 3.3034, + "step": 99400 + }, + { + "epoch": 2.55, + "learning_rate": 4.150356935478362e-05, + "loss": 2.997, + "step": 99500 + }, + { + "epoch": 2.55, + "learning_rate": 4.149503022850702e-05, + "loss": 3.1982, + "step": 99600 + }, + { + "epoch": 2.55, + "learning_rate": 4.1486491102230424e-05, + "loss": 3.0697, + "step": 99700 + }, + { + "epoch": 2.56, + "learning_rate": 4.147795197595382e-05, + "loss": 3.111, + "step": 99800 + }, + { + "epoch": 2.56, + "learning_rate": 4.146941284967723e-05, + "loss": 3.1279, + "step": 99900 + }, + { + "epoch": 2.56, + "learning_rate": 4.146087372340062e-05, + "loss": 3.1502, + "step": 100000 + }, + { + "epoch": 2.56, + "learning_rate": 4.1452334597124024e-05, + "loss": 2.8996, + "step": 100100 + }, + { + "epoch": 2.57, + "learning_rate": 4.1443795470847425e-05, + "loss": 3.2088, + "step": 100200 + }, + { + "epoch": 2.57, + "learning_rate": 4.143525634457083e-05, + "loss": 3.1219, + "step": 100300 + }, + { + "epoch": 2.57, + "learning_rate": 4.142671721829423e-05, + "loss": 3.0994, + "step": 100400 + }, + { + "epoch": 2.57, + "learning_rate": 4.141817809201763e-05, + "loss": 3.076, + "step": 100500 + }, + { + "epoch": 2.58, + "learning_rate": 4.1409638965741025e-05, + "loss": 3.0914, + "step": 100600 + }, + { + "epoch": 2.58, + "learning_rate": 4.140109983946443e-05, + "loss": 2.9685, + "step": 100700 + }, + { + "epoch": 2.58, + "learning_rate": 4.139256071318783e-05, + "loss": 3.3519, + "step": 100800 + }, + { + "epoch": 2.58, + "learning_rate": 4.138402158691123e-05, + "loss": 3.0838, + "step": 100900 + }, + { + "epoch": 2.59, + "learning_rate": 4.137548246063463e-05, + "loss": 3.0116, + "step": 101000 + }, + { + "epoch": 2.59, + "learning_rate": 4.136694333435803e-05, + "loss": 2.9562, + "step": 101100 + }, + { + "epoch": 2.59, + "learning_rate": 4.135840420808143e-05, + "loss": 3.1637, + "step": 101200 + }, + { + "epoch": 2.6, + "learning_rate": 4.1349865081804836e-05, + "loss": 3.1111, + "step": 101300 + }, + { + "epoch": 2.6, + "learning_rate": 4.134132595552823e-05, + "loss": 2.9523, + "step": 101400 + }, + { + "epoch": 2.6, + "learning_rate": 4.133278682925163e-05, + "loss": 2.9791, + "step": 101500 + }, + { + "epoch": 2.6, + "learning_rate": 4.1324247702975033e-05, + "loss": 3.0496, + "step": 101600 + }, + { + "epoch": 2.61, + "learning_rate": 4.1315708576698435e-05, + "loss": 3.0005, + "step": 101700 + }, + { + "epoch": 2.61, + "learning_rate": 4.1307169450421837e-05, + "loss": 3.242, + "step": 101800 + }, + { + "epoch": 2.61, + "learning_rate": 4.129863032414524e-05, + "loss": 2.9777, + "step": 101900 + }, + { + "epoch": 2.61, + "learning_rate": 4.129009119786863e-05, + "loss": 3.0187, + "step": 102000 + }, + { + "epoch": 2.62, + "learning_rate": 4.128155207159204e-05, + "loss": 2.9164, + "step": 102100 + }, + { + "epoch": 2.62, + "learning_rate": 4.1273012945315436e-05, + "loss": 3.1537, + "step": 102200 + }, + { + "epoch": 2.62, + "learning_rate": 4.126447381903884e-05, + "loss": 3.0327, + "step": 102300 + }, + { + "epoch": 2.62, + "learning_rate": 4.125593469276224e-05, + "loss": 3.3472, + "step": 102400 + }, + { + "epoch": 2.63, + "learning_rate": 4.124739556648564e-05, + "loss": 3.0513, + "step": 102500 + }, + { + "epoch": 2.63, + "learning_rate": 4.123885644020904e-05, + "loss": 3.0413, + "step": 102600 + }, + { + "epoch": 2.63, + "learning_rate": 4.1230317313932444e-05, + "loss": 3.041, + "step": 102700 + }, + { + "epoch": 2.63, + "learning_rate": 4.122177818765584e-05, + "loss": 3.0476, + "step": 102800 + }, + { + "epoch": 2.64, + "learning_rate": 4.121323906137925e-05, + "loss": 3.0221, + "step": 102900 + }, + { + "epoch": 2.64, + "learning_rate": 4.120469993510264e-05, + "loss": 3.0639, + "step": 103000 + }, + { + "epoch": 2.64, + "learning_rate": 4.119616080882604e-05, + "loss": 3.1602, + "step": 103100 + }, + { + "epoch": 2.64, + "learning_rate": 4.1187621682549445e-05, + "loss": 2.9889, + "step": 103200 + }, + { + "epoch": 2.65, + "learning_rate": 4.1179082556272846e-05, + "loss": 3.0175, + "step": 103300 + }, + { + "epoch": 2.65, + "learning_rate": 4.117054342999625e-05, + "loss": 2.9935, + "step": 103400 + }, + { + "epoch": 2.65, + "learning_rate": 4.116200430371965e-05, + "loss": 3.168, + "step": 103500 + }, + { + "epoch": 2.65, + "learning_rate": 4.1153465177443044e-05, + "loss": 3.0671, + "step": 103600 + }, + { + "epoch": 2.66, + "learning_rate": 4.114492605116645e-05, + "loss": 3.1425, + "step": 103700 + }, + { + "epoch": 2.66, + "learning_rate": 4.113638692488985e-05, + "loss": 3.0317, + "step": 103800 + }, + { + "epoch": 2.66, + "learning_rate": 4.112784779861325e-05, + "loss": 3.0031, + "step": 103900 + }, + { + "epoch": 2.66, + "learning_rate": 4.111930867233665e-05, + "loss": 3.0538, + "step": 104000 + }, + { + "epoch": 2.67, + "learning_rate": 4.111076954606005e-05, + "loss": 2.9468, + "step": 104100 + }, + { + "epoch": 2.67, + "learning_rate": 4.110223041978345e-05, + "loss": 2.9273, + "step": 104200 + }, + { + "epoch": 2.67, + "learning_rate": 4.1093691293506855e-05, + "loss": 2.8708, + "step": 104300 + }, + { + "epoch": 2.67, + "learning_rate": 4.108515216723025e-05, + "loss": 3.0507, + "step": 104400 + }, + { + "epoch": 2.68, + "learning_rate": 4.107661304095365e-05, + "loss": 3.059, + "step": 104500 + }, + { + "epoch": 2.68, + "learning_rate": 4.106807391467705e-05, + "loss": 2.9688, + "step": 104600 + }, + { + "epoch": 2.68, + "learning_rate": 4.1059534788400454e-05, + "loss": 3.0545, + "step": 104700 + }, + { + "epoch": 2.68, + "learning_rate": 4.1050995662123856e-05, + "loss": 3.1936, + "step": 104800 + }, + { + "epoch": 2.69, + "learning_rate": 4.104245653584726e-05, + "loss": 3.158, + "step": 104900 + }, + { + "epoch": 2.69, + "learning_rate": 4.103391740957065e-05, + "loss": 3.0972, + "step": 105000 + }, + { + "epoch": 2.69, + "learning_rate": 4.102537828329406e-05, + "loss": 3.2179, + "step": 105100 + }, + { + "epoch": 2.69, + "learning_rate": 4.1016839157017455e-05, + "loss": 3.1017, + "step": 105200 + }, + { + "epoch": 2.7, + "learning_rate": 4.100830003074086e-05, + "loss": 3.0019, + "step": 105300 + }, + { + "epoch": 2.7, + "learning_rate": 4.099976090446426e-05, + "loss": 3.1353, + "step": 105400 + }, + { + "epoch": 2.7, + "learning_rate": 4.099122177818765e-05, + "loss": 3.1709, + "step": 105500 + }, + { + "epoch": 2.71, + "learning_rate": 4.098268265191106e-05, + "loss": 2.9705, + "step": 105600 + }, + { + "epoch": 2.71, + "learning_rate": 4.0974143525634456e-05, + "loss": 3.1053, + "step": 105700 + }, + { + "epoch": 2.71, + "learning_rate": 4.096560439935786e-05, + "loss": 3.0786, + "step": 105800 + }, + { + "epoch": 2.71, + "learning_rate": 4.095706527308126e-05, + "loss": 3.1098, + "step": 105900 + }, + { + "epoch": 2.72, + "learning_rate": 4.094852614680466e-05, + "loss": 2.8744, + "step": 106000 + }, + { + "epoch": 2.72, + "learning_rate": 4.0939987020528056e-05, + "loss": 2.9382, + "step": 106100 + }, + { + "epoch": 2.72, + "learning_rate": 4.0931447894251464e-05, + "loss": 2.9395, + "step": 106200 + }, + { + "epoch": 2.72, + "learning_rate": 4.092290876797486e-05, + "loss": 3.053, + "step": 106300 + }, + { + "epoch": 2.73, + "learning_rate": 4.091436964169826e-05, + "loss": 3.1599, + "step": 106400 + }, + { + "epoch": 2.73, + "learning_rate": 4.090583051542166e-05, + "loss": 3.0938, + "step": 106500 + }, + { + "epoch": 2.73, + "learning_rate": 4.0897291389145063e-05, + "loss": 2.9966, + "step": 106600 + }, + { + "epoch": 2.73, + "learning_rate": 4.0888752262868465e-05, + "loss": 3.1077, + "step": 106700 + }, + { + "epoch": 2.74, + "learning_rate": 4.0880213136591867e-05, + "loss": 3.1119, + "step": 106800 + }, + { + "epoch": 2.74, + "learning_rate": 4.087167401031526e-05, + "loss": 3.1429, + "step": 106900 + }, + { + "epoch": 2.74, + "learning_rate": 4.086313488403867e-05, + "loss": 3.0915, + "step": 107000 + }, + { + "epoch": 2.74, + "learning_rate": 4.0854595757762064e-05, + "loss": 3.324, + "step": 107100 + }, + { + "epoch": 2.75, + "learning_rate": 4.0846056631485466e-05, + "loss": 3.1729, + "step": 107200 + }, + { + "epoch": 2.75, + "learning_rate": 4.083751750520887e-05, + "loss": 2.9374, + "step": 107300 + }, + { + "epoch": 2.75, + "learning_rate": 4.082897837893227e-05, + "loss": 3.3158, + "step": 107400 + }, + { + "epoch": 2.75, + "learning_rate": 4.082043925265567e-05, + "loss": 3.0624, + "step": 107500 + }, + { + "epoch": 2.76, + "learning_rate": 4.081190012637907e-05, + "loss": 3.199, + "step": 107600 + }, + { + "epoch": 2.76, + "learning_rate": 4.080336100010247e-05, + "loss": 3.0148, + "step": 107700 + }, + { + "epoch": 2.76, + "learning_rate": 4.0794821873825875e-05, + "loss": 3.0849, + "step": 107800 + }, + { + "epoch": 2.76, + "learning_rate": 4.078628274754927e-05, + "loss": 2.9066, + "step": 107900 + }, + { + "epoch": 2.77, + "learning_rate": 4.077774362127267e-05, + "loss": 3.0127, + "step": 108000 + }, + { + "epoch": 2.77, + "learning_rate": 4.076920449499607e-05, + "loss": 2.9849, + "step": 108100 + }, + { + "epoch": 2.77, + "learning_rate": 4.0760665368719475e-05, + "loss": 2.8326, + "step": 108200 + }, + { + "epoch": 2.77, + "learning_rate": 4.0752126242442876e-05, + "loss": 3.1002, + "step": 108300 + }, + { + "epoch": 2.78, + "learning_rate": 4.074358711616628e-05, + "loss": 2.8001, + "step": 108400 + }, + { + "epoch": 2.78, + "learning_rate": 4.073504798988967e-05, + "loss": 2.9754, + "step": 108500 + }, + { + "epoch": 2.78, + "learning_rate": 4.072650886361308e-05, + "loss": 3.1234, + "step": 108600 + }, + { + "epoch": 2.78, + "learning_rate": 4.0717969737336476e-05, + "loss": 3.0748, + "step": 108700 + }, + { + "epoch": 2.79, + "learning_rate": 4.070943061105988e-05, + "loss": 3.0435, + "step": 108800 + }, + { + "epoch": 2.79, + "learning_rate": 4.070089148478328e-05, + "loss": 3.0546, + "step": 108900 + }, + { + "epoch": 2.79, + "learning_rate": 4.069235235850668e-05, + "loss": 3.2162, + "step": 109000 + }, + { + "epoch": 2.79, + "learning_rate": 4.0683813232230075e-05, + "loss": 2.8097, + "step": 109100 + }, + { + "epoch": 2.8, + "learning_rate": 4.067527410595348e-05, + "loss": 2.949, + "step": 109200 + }, + { + "epoch": 2.8, + "learning_rate": 4.066673497967688e-05, + "loss": 3.0555, + "step": 109300 + }, + { + "epoch": 2.8, + "learning_rate": 4.065819585340028e-05, + "loss": 2.9504, + "step": 109400 + }, + { + "epoch": 2.81, + "learning_rate": 4.064965672712368e-05, + "loss": 3.1909, + "step": 109500 + }, + { + "epoch": 2.81, + "learning_rate": 4.064111760084708e-05, + "loss": 2.7929, + "step": 109600 + }, + { + "epoch": 2.81, + "learning_rate": 4.0632578474570484e-05, + "loss": 3.0875, + "step": 109700 + }, + { + "epoch": 2.81, + "learning_rate": 4.0624039348293886e-05, + "loss": 3.0052, + "step": 109800 + }, + { + "epoch": 2.82, + "learning_rate": 4.061550022201728e-05, + "loss": 2.9049, + "step": 109900 + }, + { + "epoch": 2.82, + "learning_rate": 4.060696109574069e-05, + "loss": 3.0253, + "step": 110000 + }, + { + "epoch": 2.82, + "learning_rate": 4.0598421969464084e-05, + "loss": 3.1229, + "step": 110100 + }, + { + "epoch": 2.82, + "learning_rate": 4.0589882843187485e-05, + "loss": 2.993, + "step": 110200 + }, + { + "epoch": 2.83, + "learning_rate": 4.058134371691089e-05, + "loss": 3.04, + "step": 110300 + }, + { + "epoch": 2.83, + "learning_rate": 4.057280459063429e-05, + "loss": 3.0167, + "step": 110400 + }, + { + "epoch": 2.83, + "learning_rate": 4.056426546435769e-05, + "loss": 3.3009, + "step": 110500 + }, + { + "epoch": 2.83, + "learning_rate": 4.055572633808109e-05, + "loss": 3.1617, + "step": 110600 + }, + { + "epoch": 2.84, + "learning_rate": 4.0547187211804486e-05, + "loss": 3.1479, + "step": 110700 + }, + { + "epoch": 2.84, + "learning_rate": 4.0538648085527895e-05, + "loss": 3.1471, + "step": 110800 + }, + { + "epoch": 2.84, + "learning_rate": 4.053010895925129e-05, + "loss": 3.046, + "step": 110900 + }, + { + "epoch": 2.84, + "learning_rate": 4.052156983297469e-05, + "loss": 3.1228, + "step": 111000 + }, + { + "epoch": 2.85, + "learning_rate": 4.051303070669809e-05, + "loss": 2.9721, + "step": 111100 + }, + { + "epoch": 2.85, + "learning_rate": 4.0504491580421494e-05, + "loss": 2.9042, + "step": 111200 + }, + { + "epoch": 2.85, + "learning_rate": 4.049595245414489e-05, + "loss": 3.1024, + "step": 111300 + }, + { + "epoch": 2.85, + "learning_rate": 4.04874133278683e-05, + "loss": 3.2672, + "step": 111400 + }, + { + "epoch": 2.86, + "learning_rate": 4.047887420159169e-05, + "loss": 3.0472, + "step": 111500 + }, + { + "epoch": 2.86, + "learning_rate": 4.0470335075315093e-05, + "loss": 3.0819, + "step": 111600 + }, + { + "epoch": 2.86, + "learning_rate": 4.0461795949038495e-05, + "loss": 3.0193, + "step": 111700 + }, + { + "epoch": 2.86, + "learning_rate": 4.0453256822761897e-05, + "loss": 2.9185, + "step": 111800 + }, + { + "epoch": 2.87, + "learning_rate": 4.04447176964853e-05, + "loss": 3.3109, + "step": 111900 + }, + { + "epoch": 2.87, + "learning_rate": 4.04361785702087e-05, + "loss": 2.9213, + "step": 112000 + }, + { + "epoch": 2.87, + "learning_rate": 4.0427639443932094e-05, + "loss": 2.9948, + "step": 112100 + }, + { + "epoch": 2.87, + "learning_rate": 4.04191003176555e-05, + "loss": 2.936, + "step": 112200 + }, + { + "epoch": 2.88, + "learning_rate": 4.04105611913789e-05, + "loss": 3.2933, + "step": 112300 + }, + { + "epoch": 2.88, + "learning_rate": 4.04020220651023e-05, + "loss": 3.0799, + "step": 112400 + }, + { + "epoch": 2.88, + "learning_rate": 4.03934829388257e-05, + "loss": 3.0602, + "step": 112500 + }, + { + "epoch": 2.88, + "learning_rate": 4.03849438125491e-05, + "loss": 3.1894, + "step": 112600 + }, + { + "epoch": 2.89, + "learning_rate": 4.0376404686272504e-05, + "loss": 2.8404, + "step": 112700 + }, + { + "epoch": 2.89, + "learning_rate": 4.0367865559995905e-05, + "loss": 2.8567, + "step": 112800 + }, + { + "epoch": 2.89, + "learning_rate": 4.03593264337193e-05, + "loss": 2.9545, + "step": 112900 + }, + { + "epoch": 2.89, + "learning_rate": 4.035078730744271e-05, + "loss": 3.1068, + "step": 113000 + }, + { + "epoch": 2.9, + "learning_rate": 4.03422481811661e-05, + "loss": 3.0017, + "step": 113100 + }, + { + "epoch": 2.9, + "learning_rate": 4.0333709054889505e-05, + "loss": 2.9809, + "step": 113200 + }, + { + "epoch": 2.9, + "learning_rate": 4.0325169928612906e-05, + "loss": 2.9555, + "step": 113300 + }, + { + "epoch": 2.91, + "learning_rate": 4.031663080233631e-05, + "loss": 2.7379, + "step": 113400 + }, + { + "epoch": 2.91, + "learning_rate": 4.030809167605971e-05, + "loss": 2.882, + "step": 113500 + }, + { + "epoch": 2.91, + "learning_rate": 4.029955254978311e-05, + "loss": 2.8831, + "step": 113600 + }, + { + "epoch": 2.91, + "learning_rate": 4.0291013423506506e-05, + "loss": 3.051, + "step": 113700 + }, + { + "epoch": 2.92, + "learning_rate": 4.0282474297229914e-05, + "loss": 3.0809, + "step": 113800 + }, + { + "epoch": 2.92, + "learning_rate": 4.027393517095331e-05, + "loss": 2.9474, + "step": 113900 + }, + { + "epoch": 2.92, + "learning_rate": 4.026539604467671e-05, + "loss": 3.083, + "step": 114000 + }, + { + "epoch": 2.92, + "learning_rate": 4.025685691840011e-05, + "loss": 3.0755, + "step": 114100 + }, + { + "epoch": 2.93, + "learning_rate": 4.024831779212351e-05, + "loss": 3.2383, + "step": 114200 + }, + { + "epoch": 2.93, + "learning_rate": 4.023977866584691e-05, + "loss": 2.9592, + "step": 114300 + }, + { + "epoch": 2.93, + "learning_rate": 4.0231239539570316e-05, + "loss": 2.841, + "step": 114400 + }, + { + "epoch": 2.93, + "learning_rate": 4.022270041329371e-05, + "loss": 2.9557, + "step": 114500 + }, + { + "epoch": 2.94, + "learning_rate": 4.021416128701711e-05, + "loss": 3.0037, + "step": 114600 + }, + { + "epoch": 2.94, + "learning_rate": 4.0205622160740514e-05, + "loss": 2.8829, + "step": 114700 + }, + { + "epoch": 2.94, + "learning_rate": 4.0197083034463916e-05, + "loss": 3.0496, + "step": 114800 + }, + { + "epoch": 2.94, + "learning_rate": 4.018854390818732e-05, + "loss": 3.0789, + "step": 114900 + }, + { + "epoch": 2.95, + "learning_rate": 4.018000478191072e-05, + "loss": 2.9696, + "step": 115000 + }, + { + "epoch": 2.95, + "learning_rate": 4.0171465655634114e-05, + "loss": 2.9391, + "step": 115100 + }, + { + "epoch": 2.95, + "learning_rate": 4.016292652935752e-05, + "loss": 3.196, + "step": 115200 + }, + { + "epoch": 2.95, + "learning_rate": 4.015438740308092e-05, + "loss": 2.8523, + "step": 115300 + }, + { + "epoch": 2.96, + "learning_rate": 4.014584827680432e-05, + "loss": 3.1546, + "step": 115400 + }, + { + "epoch": 2.96, + "learning_rate": 4.013730915052772e-05, + "loss": 2.8606, + "step": 115500 + }, + { + "epoch": 2.96, + "learning_rate": 4.012877002425112e-05, + "loss": 3.0395, + "step": 115600 + }, + { + "epoch": 2.96, + "learning_rate": 4.012023089797452e-05, + "loss": 2.8805, + "step": 115700 + }, + { + "epoch": 2.97, + "learning_rate": 4.0111691771697925e-05, + "loss": 3.0013, + "step": 115800 + }, + { + "epoch": 2.97, + "learning_rate": 4.010315264542132e-05, + "loss": 3.0258, + "step": 115900 + }, + { + "epoch": 2.97, + "learning_rate": 4.009461351914473e-05, + "loss": 2.9218, + "step": 116000 + }, + { + "epoch": 2.97, + "learning_rate": 4.008607439286812e-05, + "loss": 3.0788, + "step": 116100 + }, + { + "epoch": 2.98, + "learning_rate": 4.0077535266591524e-05, + "loss": 3.0068, + "step": 116200 + }, + { + "epoch": 2.98, + "learning_rate": 4.0068996140314926e-05, + "loss": 3.1724, + "step": 116300 + }, + { + "epoch": 2.98, + "learning_rate": 4.006045701403833e-05, + "loss": 3.103, + "step": 116400 + }, + { + "epoch": 2.98, + "learning_rate": 4.005191788776172e-05, + "loss": 3.1552, + "step": 116500 + }, + { + "epoch": 2.99, + "learning_rate": 4.004337876148513e-05, + "loss": 3.036, + "step": 116600 + }, + { + "epoch": 2.99, + "learning_rate": 4.0034839635208525e-05, + "loss": 3.2177, + "step": 116700 + }, + { + "epoch": 2.99, + "learning_rate": 4.0026300508931927e-05, + "loss": 3.1162, + "step": 116800 + }, + { + "epoch": 2.99, + "learning_rate": 4.001776138265533e-05, + "loss": 3.0141, + "step": 116900 + }, + { + "epoch": 3.0, + "learning_rate": 4.000922225637873e-05, + "loss": 2.9341, + "step": 117000 + }, + { + "epoch": 3.0, + "learning_rate": 4.000068313010213e-05, + "loss": 2.9165, + "step": 117100 + }, + { + "epoch": 3.0, + "learning_rate": 3.999214400382553e-05, + "loss": 3.046, + "step": 117200 + }, + { + "epoch": 3.0, + "learning_rate": 3.998360487754893e-05, + "loss": 2.8234, + "step": 117300 + }, + { + "epoch": 3.01, + "learning_rate": 3.9975065751272336e-05, + "loss": 3.1521, + "step": 117400 + }, + { + "epoch": 3.01, + "learning_rate": 3.996652662499573e-05, + "loss": 3.014, + "step": 117500 + }, + { + "epoch": 3.01, + "learning_rate": 3.995798749871913e-05, + "loss": 3.1095, + "step": 117600 + }, + { + "epoch": 3.02, + "learning_rate": 3.9949448372442534e-05, + "loss": 3.0437, + "step": 117700 + }, + { + "epoch": 3.02, + "learning_rate": 3.9940909246165935e-05, + "loss": 2.9899, + "step": 117800 + }, + { + "epoch": 3.02, + "learning_rate": 3.993237011988934e-05, + "loss": 2.9003, + "step": 117900 + }, + { + "epoch": 3.02, + "learning_rate": 3.992383099361274e-05, + "loss": 3.0027, + "step": 118000 + }, + { + "epoch": 3.03, + "learning_rate": 3.991529186733613e-05, + "loss": 3.1018, + "step": 118100 + }, + { + "epoch": 3.03, + "learning_rate": 3.990675274105954e-05, + "loss": 2.9672, + "step": 118200 + }, + { + "epoch": 3.03, + "learning_rate": 3.9898213614782936e-05, + "loss": 2.9003, + "step": 118300 + }, + { + "epoch": 3.03, + "learning_rate": 3.988967448850634e-05, + "loss": 2.8893, + "step": 118400 + }, + { + "epoch": 3.04, + "learning_rate": 3.988113536222974e-05, + "loss": 3.148, + "step": 118500 + }, + { + "epoch": 3.04, + "learning_rate": 3.987259623595314e-05, + "loss": 2.9243, + "step": 118600 + }, + { + "epoch": 3.04, + "learning_rate": 3.986405710967654e-05, + "loss": 2.9773, + "step": 118700 + }, + { + "epoch": 3.04, + "learning_rate": 3.9855517983399944e-05, + "loss": 2.8417, + "step": 118800 + }, + { + "epoch": 3.05, + "learning_rate": 3.984697885712334e-05, + "loss": 3.0683, + "step": 118900 + }, + { + "epoch": 3.05, + "learning_rate": 3.983843973084675e-05, + "loss": 2.9911, + "step": 119000 + }, + { + "epoch": 3.05, + "learning_rate": 3.982990060457014e-05, + "loss": 2.8986, + "step": 119100 + }, + { + "epoch": 3.05, + "learning_rate": 3.982136147829354e-05, + "loss": 3.2045, + "step": 119200 + }, + { + "epoch": 3.06, + "learning_rate": 3.9812822352016945e-05, + "loss": 2.8607, + "step": 119300 + }, + { + "epoch": 3.06, + "learning_rate": 3.9804283225740346e-05, + "loss": 2.9648, + "step": 119400 + }, + { + "epoch": 3.06, + "learning_rate": 3.979574409946374e-05, + "loss": 2.9939, + "step": 119500 + }, + { + "epoch": 3.06, + "learning_rate": 3.978720497318715e-05, + "loss": 3.1878, + "step": 119600 + }, + { + "epoch": 3.07, + "learning_rate": 3.9778665846910544e-05, + "loss": 2.9334, + "step": 119700 + }, + { + "epoch": 3.07, + "learning_rate": 3.9770126720633946e-05, + "loss": 2.9575, + "step": 119800 + }, + { + "epoch": 3.07, + "learning_rate": 3.976158759435735e-05, + "loss": 2.8912, + "step": 119900 + }, + { + "epoch": 3.07, + "learning_rate": 3.975304846808075e-05, + "loss": 2.7881, + "step": 120000 + }, + { + "epoch": 3.08, + "learning_rate": 3.974450934180415e-05, + "loss": 3.0064, + "step": 120100 + }, + { + "epoch": 3.08, + "learning_rate": 3.973597021552755e-05, + "loss": 2.963, + "step": 120200 + }, + { + "epoch": 3.08, + "learning_rate": 3.972743108925095e-05, + "loss": 3.2183, + "step": 120300 + }, + { + "epoch": 3.08, + "learning_rate": 3.9718891962974355e-05, + "loss": 2.8965, + "step": 120400 + }, + { + "epoch": 3.09, + "learning_rate": 3.971035283669775e-05, + "loss": 2.8146, + "step": 120500 + }, + { + "epoch": 3.09, + "learning_rate": 3.970181371042115e-05, + "loss": 2.9657, + "step": 120600 + }, + { + "epoch": 3.09, + "learning_rate": 3.969327458414455e-05, + "loss": 3.0492, + "step": 120700 + }, + { + "epoch": 3.09, + "learning_rate": 3.9684735457867955e-05, + "loss": 3.0475, + "step": 120800 + }, + { + "epoch": 3.1, + "learning_rate": 3.9676196331591356e-05, + "loss": 2.8418, + "step": 120900 + }, + { + "epoch": 3.1, + "learning_rate": 3.966765720531476e-05, + "loss": 2.9023, + "step": 121000 + }, + { + "epoch": 3.1, + "learning_rate": 3.965911807903815e-05, + "loss": 2.9436, + "step": 121100 + }, + { + "epoch": 3.1, + "learning_rate": 3.965057895276156e-05, + "loss": 3.025, + "step": 121200 + }, + { + "epoch": 3.11, + "learning_rate": 3.9642039826484956e-05, + "loss": 3.0259, + "step": 121300 + }, + { + "epoch": 3.11, + "learning_rate": 3.963350070020836e-05, + "loss": 3.0025, + "step": 121400 + }, + { + "epoch": 3.11, + "learning_rate": 3.962496157393176e-05, + "loss": 2.8463, + "step": 121500 + }, + { + "epoch": 3.12, + "learning_rate": 3.961642244765516e-05, + "loss": 3.0121, + "step": 121600 + }, + { + "epoch": 3.12, + "learning_rate": 3.9607883321378555e-05, + "loss": 2.9603, + "step": 121700 + }, + { + "epoch": 3.12, + "learning_rate": 3.959934419510196e-05, + "loss": 2.9895, + "step": 121800 + }, + { + "epoch": 3.12, + "learning_rate": 3.959080506882536e-05, + "loss": 2.9055, + "step": 121900 + }, + { + "epoch": 3.13, + "learning_rate": 3.9582265942548766e-05, + "loss": 2.9311, + "step": 122000 + }, + { + "epoch": 3.13, + "learning_rate": 3.957372681627216e-05, + "loss": 2.9778, + "step": 122100 + }, + { + "epoch": 3.13, + "learning_rate": 3.956518768999556e-05, + "loss": 2.9807, + "step": 122200 + }, + { + "epoch": 3.13, + "learning_rate": 3.9556648563718964e-05, + "loss": 3.1473, + "step": 122300 + }, + { + "epoch": 3.14, + "learning_rate": 3.9548109437442366e-05, + "loss": 3.1106, + "step": 122400 + }, + { + "epoch": 3.14, + "learning_rate": 3.953957031116576e-05, + "loss": 2.8529, + "step": 122500 + }, + { + "epoch": 3.14, + "learning_rate": 3.953103118488917e-05, + "loss": 3.0184, + "step": 122600 + }, + { + "epoch": 3.14, + "learning_rate": 3.9522492058612564e-05, + "loss": 3.0022, + "step": 122700 + }, + { + "epoch": 3.15, + "learning_rate": 3.9513952932335965e-05, + "loss": 3.1624, + "step": 122800 + }, + { + "epoch": 3.15, + "learning_rate": 3.950541380605937e-05, + "loss": 2.8583, + "step": 122900 + }, + { + "epoch": 3.15, + "learning_rate": 3.949687467978276e-05, + "loss": 2.9685, + "step": 123000 + }, + { + "epoch": 3.15, + "learning_rate": 3.948833555350617e-05, + "loss": 3.1809, + "step": 123100 + }, + { + "epoch": 3.16, + "learning_rate": 3.9479796427229565e-05, + "loss": 3.116, + "step": 123200 + }, + { + "epoch": 3.16, + "learning_rate": 3.9471257300952966e-05, + "loss": 3.0942, + "step": 123300 + }, + { + "epoch": 3.16, + "learning_rate": 3.946271817467637e-05, + "loss": 3.0704, + "step": 123400 + }, + { + "epoch": 3.16, + "learning_rate": 3.945417904839977e-05, + "loss": 3.0432, + "step": 123500 + }, + { + "epoch": 3.17, + "learning_rate": 3.944563992212317e-05, + "loss": 2.9057, + "step": 123600 + }, + { + "epoch": 3.17, + "learning_rate": 3.943710079584657e-05, + "loss": 2.9233, + "step": 123700 + }, + { + "epoch": 3.17, + "learning_rate": 3.942856166956997e-05, + "loss": 2.9568, + "step": 123800 + }, + { + "epoch": 3.17, + "learning_rate": 3.9420022543293375e-05, + "loss": 2.9892, + "step": 123900 + }, + { + "epoch": 3.18, + "learning_rate": 3.941148341701677e-05, + "loss": 3.1241, + "step": 124000 + }, + { + "epoch": 3.18, + "learning_rate": 3.940294429074017e-05, + "loss": 3.1531, + "step": 124100 + }, + { + "epoch": 3.18, + "learning_rate": 3.939440516446357e-05, + "loss": 2.8468, + "step": 124200 + }, + { + "epoch": 3.18, + "learning_rate": 3.9385866038186975e-05, + "loss": 3.1295, + "step": 124300 + }, + { + "epoch": 3.19, + "learning_rate": 3.937732691191037e-05, + "loss": 2.959, + "step": 124400 + }, + { + "epoch": 3.19, + "learning_rate": 3.936878778563378e-05, + "loss": 2.9155, + "step": 124500 + }, + { + "epoch": 3.19, + "learning_rate": 3.936024865935717e-05, + "loss": 3.0969, + "step": 124600 + }, + { + "epoch": 3.19, + "learning_rate": 3.9351709533080574e-05, + "loss": 2.978, + "step": 124700 + }, + { + "epoch": 3.2, + "learning_rate": 3.9343170406803976e-05, + "loss": 3.0562, + "step": 124800 + }, + { + "epoch": 3.2, + "learning_rate": 3.933463128052738e-05, + "loss": 2.9286, + "step": 124900 + }, + { + "epoch": 3.2, + "learning_rate": 3.932609215425078e-05, + "loss": 2.9911, + "step": 125000 + }, + { + "epoch": 3.2, + "learning_rate": 3.931755302797418e-05, + "loss": 2.9229, + "step": 125100 + }, + { + "epoch": 3.21, + "learning_rate": 3.9309013901697575e-05, + "loss": 3.0515, + "step": 125200 + }, + { + "epoch": 3.21, + "learning_rate": 3.9300474775420984e-05, + "loss": 3.0484, + "step": 125300 + }, + { + "epoch": 3.21, + "learning_rate": 3.929193564914438e-05, + "loss": 2.9142, + "step": 125400 + }, + { + "epoch": 3.21, + "learning_rate": 3.928339652286778e-05, + "loss": 3.0388, + "step": 125500 + }, + { + "epoch": 3.22, + "learning_rate": 3.927485739659118e-05, + "loss": 2.9048, + "step": 125600 + }, + { + "epoch": 3.22, + "learning_rate": 3.926631827031458e-05, + "loss": 2.8863, + "step": 125700 + }, + { + "epoch": 3.22, + "learning_rate": 3.9257779144037985e-05, + "loss": 2.9024, + "step": 125800 + }, + { + "epoch": 3.23, + "learning_rate": 3.9249240017761386e-05, + "loss": 2.828, + "step": 125900 + }, + { + "epoch": 3.23, + "learning_rate": 3.924070089148478e-05, + "loss": 2.8643, + "step": 126000 + }, + { + "epoch": 3.23, + "learning_rate": 3.923216176520819e-05, + "loss": 3.0604, + "step": 126100 + }, + { + "epoch": 3.23, + "learning_rate": 3.9223622638931584e-05, + "loss": 3.0527, + "step": 126200 + }, + { + "epoch": 3.24, + "learning_rate": 3.9215083512654985e-05, + "loss": 3.0711, + "step": 126300 + }, + { + "epoch": 3.24, + "learning_rate": 3.920654438637839e-05, + "loss": 2.9576, + "step": 126400 + }, + { + "epoch": 3.24, + "learning_rate": 3.919800526010179e-05, + "loss": 2.8507, + "step": 126500 + }, + { + "epoch": 3.24, + "learning_rate": 3.918946613382518e-05, + "loss": 2.9436, + "step": 126600 + }, + { + "epoch": 3.25, + "learning_rate": 3.918092700754859e-05, + "loss": 3.0334, + "step": 126700 + }, + { + "epoch": 3.25, + "learning_rate": 3.9172387881271986e-05, + "loss": 3.071, + "step": 126800 + }, + { + "epoch": 3.25, + "learning_rate": 3.9163848754995395e-05, + "loss": 2.9778, + "step": 126900 + }, + { + "epoch": 3.25, + "learning_rate": 3.915530962871879e-05, + "loss": 3.2001, + "step": 127000 + }, + { + "epoch": 3.26, + "learning_rate": 3.914677050244219e-05, + "loss": 3.0323, + "step": 127100 + }, + { + "epoch": 3.26, + "learning_rate": 3.913823137616559e-05, + "loss": 2.9486, + "step": 127200 + }, + { + "epoch": 3.26, + "learning_rate": 3.9129692249888994e-05, + "loss": 2.965, + "step": 127300 + }, + { + "epoch": 3.26, + "learning_rate": 3.912115312361239e-05, + "loss": 2.7533, + "step": 127400 + }, + { + "epoch": 3.27, + "learning_rate": 3.91126139973358e-05, + "loss": 2.985, + "step": 127500 + }, + { + "epoch": 3.27, + "learning_rate": 3.910407487105919e-05, + "loss": 2.9597, + "step": 127600 + }, + { + "epoch": 3.27, + "learning_rate": 3.9095535744782594e-05, + "loss": 3.1081, + "step": 127700 + }, + { + "epoch": 3.27, + "learning_rate": 3.9086996618505995e-05, + "loss": 2.8903, + "step": 127800 + }, + { + "epoch": 3.28, + "learning_rate": 3.90784574922294e-05, + "loss": 2.6809, + "step": 127900 + }, + { + "epoch": 3.28, + "learning_rate": 3.90699183659528e-05, + "loss": 3.1497, + "step": 128000 + }, + { + "epoch": 3.28, + "learning_rate": 3.90613792396762e-05, + "loss": 3.027, + "step": 128100 + }, + { + "epoch": 3.28, + "learning_rate": 3.9052840113399595e-05, + "loss": 3.0816, + "step": 128200 + }, + { + "epoch": 3.29, + "learning_rate": 3.9044300987123e-05, + "loss": 2.9045, + "step": 128300 + }, + { + "epoch": 3.29, + "learning_rate": 3.90357618608464e-05, + "loss": 3.1785, + "step": 128400 + }, + { + "epoch": 3.29, + "learning_rate": 3.90272227345698e-05, + "loss": 2.9222, + "step": 128500 + }, + { + "epoch": 3.29, + "learning_rate": 3.90186836082932e-05, + "loss": 3.013, + "step": 128600 + }, + { + "epoch": 3.3, + "learning_rate": 3.90101444820166e-05, + "loss": 2.863, + "step": 128700 + }, + { + "epoch": 3.3, + "learning_rate": 3.9001605355740004e-05, + "loss": 3.0084, + "step": 128800 + }, + { + "epoch": 3.3, + "learning_rate": 3.8993066229463405e-05, + "loss": 2.9316, + "step": 128900 + }, + { + "epoch": 3.3, + "learning_rate": 3.89845271031868e-05, + "loss": 2.8692, + "step": 129000 + }, + { + "epoch": 3.31, + "learning_rate": 3.897598797691021e-05, + "loss": 2.8845, + "step": 129100 + }, + { + "epoch": 3.31, + "learning_rate": 3.89674488506336e-05, + "loss": 2.9282, + "step": 129200 + }, + { + "epoch": 3.31, + "learning_rate": 3.8958909724357005e-05, + "loss": 2.9483, + "step": 129300 + }, + { + "epoch": 3.31, + "learning_rate": 3.8950370598080406e-05, + "loss": 3.0091, + "step": 129400 + }, + { + "epoch": 3.32, + "learning_rate": 3.894183147180381e-05, + "loss": 2.9191, + "step": 129500 + }, + { + "epoch": 3.32, + "learning_rate": 3.89332923455272e-05, + "loss": 3.0748, + "step": 129600 + }, + { + "epoch": 3.32, + "learning_rate": 3.892475321925061e-05, + "loss": 2.9653, + "step": 129700 + }, + { + "epoch": 3.33, + "learning_rate": 3.8916214092974006e-05, + "loss": 2.7149, + "step": 129800 + }, + { + "epoch": 3.33, + "learning_rate": 3.890767496669741e-05, + "loss": 3.0052, + "step": 129900 + }, + { + "epoch": 3.33, + "learning_rate": 3.889913584042081e-05, + "loss": 3.1336, + "step": 130000 + }, + { + "epoch": 3.33, + "learning_rate": 3.889059671414421e-05, + "loss": 2.8969, + "step": 130100 + }, + { + "epoch": 3.34, + "learning_rate": 3.888205758786761e-05, + "loss": 2.817, + "step": 130200 + }, + { + "epoch": 3.34, + "learning_rate": 3.8873518461591014e-05, + "loss": 2.995, + "step": 130300 + }, + { + "epoch": 3.34, + "learning_rate": 3.886497933531441e-05, + "loss": 2.8422, + "step": 130400 + }, + { + "epoch": 3.34, + "learning_rate": 3.885644020903782e-05, + "loss": 2.9654, + "step": 130500 + }, + { + "epoch": 3.35, + "learning_rate": 3.884790108276121e-05, + "loss": 2.793, + "step": 130600 + }, + { + "epoch": 3.35, + "learning_rate": 3.883936195648461e-05, + "loss": 3.042, + "step": 130700 + }, + { + "epoch": 3.35, + "learning_rate": 3.8830822830208015e-05, + "loss": 3.0508, + "step": 130800 + }, + { + "epoch": 3.35, + "learning_rate": 3.8822283703931416e-05, + "loss": 2.8977, + "step": 130900 + }, + { + "epoch": 3.36, + "learning_rate": 3.881374457765482e-05, + "loss": 2.8101, + "step": 131000 + }, + { + "epoch": 3.36, + "learning_rate": 3.880520545137822e-05, + "loss": 2.9051, + "step": 131100 + }, + { + "epoch": 3.36, + "learning_rate": 3.8796666325101614e-05, + "loss": 2.9152, + "step": 131200 + }, + { + "epoch": 3.36, + "learning_rate": 3.878812719882502e-05, + "loss": 2.9743, + "step": 131300 + }, + { + "epoch": 3.37, + "learning_rate": 3.877958807254842e-05, + "loss": 3.0049, + "step": 131400 + }, + { + "epoch": 3.37, + "learning_rate": 3.877104894627182e-05, + "loss": 3.0142, + "step": 131500 + }, + { + "epoch": 3.37, + "learning_rate": 3.876250981999522e-05, + "loss": 2.9883, + "step": 131600 + }, + { + "epoch": 3.37, + "learning_rate": 3.875397069371862e-05, + "loss": 2.9499, + "step": 131700 + }, + { + "epoch": 3.38, + "learning_rate": 3.874543156744202e-05, + "loss": 2.8573, + "step": 131800 + }, + { + "epoch": 3.38, + "learning_rate": 3.8736892441165425e-05, + "loss": 2.9085, + "step": 131900 + }, + { + "epoch": 3.38, + "learning_rate": 3.872835331488882e-05, + "loss": 2.8178, + "step": 132000 + }, + { + "epoch": 3.38, + "learning_rate": 3.871981418861223e-05, + "loss": 2.8433, + "step": 132100 + }, + { + "epoch": 3.39, + "learning_rate": 3.871127506233562e-05, + "loss": 2.8564, + "step": 132200 + }, + { + "epoch": 3.39, + "learning_rate": 3.8702735936059024e-05, + "loss": 2.9895, + "step": 132300 + }, + { + "epoch": 3.39, + "learning_rate": 3.8694196809782426e-05, + "loss": 3.0891, + "step": 132400 + }, + { + "epoch": 3.39, + "learning_rate": 3.868565768350583e-05, + "loss": 2.7649, + "step": 132500 + }, + { + "epoch": 3.4, + "learning_rate": 3.867711855722922e-05, + "loss": 3.0051, + "step": 132600 + }, + { + "epoch": 3.4, + "learning_rate": 3.866857943095263e-05, + "loss": 2.9748, + "step": 132700 + }, + { + "epoch": 3.4, + "learning_rate": 3.8660040304676025e-05, + "loss": 2.8772, + "step": 132800 + }, + { + "epoch": 3.4, + "learning_rate": 3.865150117839943e-05, + "loss": 3.038, + "step": 132900 + }, + { + "epoch": 3.41, + "learning_rate": 3.864296205212283e-05, + "loss": 2.9276, + "step": 133000 + }, + { + "epoch": 3.41, + "learning_rate": 3.863442292584623e-05, + "loss": 2.8391, + "step": 133100 + }, + { + "epoch": 3.41, + "learning_rate": 3.862588379956963e-05, + "loss": 2.8938, + "step": 133200 + }, + { + "epoch": 3.41, + "learning_rate": 3.861734467329303e-05, + "loss": 2.5677, + "step": 133300 + }, + { + "epoch": 3.42, + "learning_rate": 3.860880554701643e-05, + "loss": 2.934, + "step": 133400 + }, + { + "epoch": 3.42, + "learning_rate": 3.8600266420739836e-05, + "loss": 2.9272, + "step": 133500 + }, + { + "epoch": 3.42, + "learning_rate": 3.859172729446323e-05, + "loss": 3.0463, + "step": 133600 + }, + { + "epoch": 3.43, + "learning_rate": 3.858318816818663e-05, + "loss": 3.1404, + "step": 133700 + }, + { + "epoch": 3.43, + "learning_rate": 3.8574649041910034e-05, + "loss": 2.9115, + "step": 133800 + }, + { + "epoch": 3.43, + "learning_rate": 3.8566109915633435e-05, + "loss": 2.9446, + "step": 133900 + }, + { + "epoch": 3.43, + "learning_rate": 3.855757078935684e-05, + "loss": 2.7483, + "step": 134000 + }, + { + "epoch": 3.44, + "learning_rate": 3.854903166308024e-05, + "loss": 3.0783, + "step": 134100 + }, + { + "epoch": 3.44, + "learning_rate": 3.854049253680363e-05, + "loss": 2.9553, + "step": 134200 + }, + { + "epoch": 3.44, + "learning_rate": 3.853195341052704e-05, + "loss": 2.896, + "step": 134300 + }, + { + "epoch": 3.44, + "learning_rate": 3.8523414284250436e-05, + "loss": 3.1303, + "step": 134400 + }, + { + "epoch": 3.45, + "learning_rate": 3.851487515797384e-05, + "loss": 3.0356, + "step": 134500 + }, + { + "epoch": 3.45, + "learning_rate": 3.850633603169724e-05, + "loss": 2.8436, + "step": 134600 + }, + { + "epoch": 3.45, + "learning_rate": 3.849779690542064e-05, + "loss": 2.9615, + "step": 134700 + }, + { + "epoch": 3.45, + "learning_rate": 3.8489257779144036e-05, + "loss": 2.8318, + "step": 134800 + }, + { + "epoch": 3.46, + "learning_rate": 3.8480718652867444e-05, + "loss": 3.1643, + "step": 134900 + }, + { + "epoch": 3.46, + "learning_rate": 3.847217952659084e-05, + "loss": 3.0151, + "step": 135000 + }, + { + "epoch": 3.46, + "learning_rate": 3.846364040031424e-05, + "loss": 3.0796, + "step": 135100 + }, + { + "epoch": 3.46, + "learning_rate": 3.845510127403764e-05, + "loss": 2.7781, + "step": 135200 + }, + { + "epoch": 3.47, + "learning_rate": 3.8446562147761044e-05, + "loss": 3.1224, + "step": 135300 + }, + { + "epoch": 3.47, + "learning_rate": 3.8438023021484445e-05, + "loss": 2.6578, + "step": 135400 + }, + { + "epoch": 3.47, + "learning_rate": 3.842948389520785e-05, + "loss": 2.9786, + "step": 135500 + }, + { + "epoch": 3.47, + "learning_rate": 3.842094476893124e-05, + "loss": 2.8441, + "step": 135600 + }, + { + "epoch": 3.48, + "learning_rate": 3.841240564265465e-05, + "loss": 2.7938, + "step": 135700 + }, + { + "epoch": 3.48, + "learning_rate": 3.8403866516378044e-05, + "loss": 3.0032, + "step": 135800 + }, + { + "epoch": 3.48, + "learning_rate": 3.8395327390101446e-05, + "loss": 2.8721, + "step": 135900 + }, + { + "epoch": 3.48, + "learning_rate": 3.838678826382485e-05, + "loss": 2.8084, + "step": 136000 + }, + { + "epoch": 3.49, + "learning_rate": 3.837824913754825e-05, + "loss": 2.8603, + "step": 136100 + }, + { + "epoch": 3.49, + "learning_rate": 3.836971001127165e-05, + "loss": 3.0363, + "step": 136200 + }, + { + "epoch": 3.49, + "learning_rate": 3.836117088499505e-05, + "loss": 2.9738, + "step": 136300 + }, + { + "epoch": 3.49, + "learning_rate": 3.835263175871845e-05, + "loss": 2.9322, + "step": 136400 + }, + { + "epoch": 3.5, + "learning_rate": 3.8344092632441855e-05, + "loss": 3.1189, + "step": 136500 + }, + { + "epoch": 3.5, + "learning_rate": 3.833555350616525e-05, + "loss": 2.8642, + "step": 136600 + }, + { + "epoch": 3.5, + "learning_rate": 3.832701437988865e-05, + "loss": 2.8171, + "step": 136700 + }, + { + "epoch": 3.5, + "learning_rate": 3.831847525361205e-05, + "loss": 3.0146, + "step": 136800 + }, + { + "epoch": 3.51, + "learning_rate": 3.8309936127335455e-05, + "loss": 2.9867, + "step": 136900 + }, + { + "epoch": 3.51, + "learning_rate": 3.8301397001058856e-05, + "loss": 2.8355, + "step": 137000 + }, + { + "epoch": 3.51, + "learning_rate": 3.829285787478226e-05, + "loss": 2.7731, + "step": 137100 + }, + { + "epoch": 3.51, + "learning_rate": 3.828431874850565e-05, + "loss": 2.9714, + "step": 137200 + }, + { + "epoch": 3.52, + "learning_rate": 3.827577962222906e-05, + "loss": 2.9709, + "step": 137300 + }, + { + "epoch": 3.52, + "learning_rate": 3.8267240495952456e-05, + "loss": 3.0123, + "step": 137400 + }, + { + "epoch": 3.52, + "learning_rate": 3.825870136967586e-05, + "loss": 2.9678, + "step": 137500 + }, + { + "epoch": 3.52, + "learning_rate": 3.825016224339926e-05, + "loss": 2.7263, + "step": 137600 + }, + { + "epoch": 3.53, + "learning_rate": 3.824162311712266e-05, + "loss": 2.9291, + "step": 137700 + }, + { + "epoch": 3.53, + "learning_rate": 3.8233083990846055e-05, + "loss": 2.848, + "step": 137800 + }, + { + "epoch": 3.53, + "learning_rate": 3.8224544864569463e-05, + "loss": 2.8997, + "step": 137900 + }, + { + "epoch": 3.54, + "learning_rate": 3.821600573829286e-05, + "loss": 3.0481, + "step": 138000 + }, + { + "epoch": 3.54, + "learning_rate": 3.820746661201626e-05, + "loss": 3.079, + "step": 138100 + }, + { + "epoch": 3.54, + "learning_rate": 3.819892748573966e-05, + "loss": 2.9975, + "step": 138200 + }, + { + "epoch": 3.54, + "learning_rate": 3.819038835946306e-05, + "loss": 3.0374, + "step": 138300 + }, + { + "epoch": 3.55, + "learning_rate": 3.8181849233186464e-05, + "loss": 2.912, + "step": 138400 + }, + { + "epoch": 3.55, + "learning_rate": 3.8173310106909866e-05, + "loss": 3.0378, + "step": 138500 + }, + { + "epoch": 3.55, + "learning_rate": 3.816477098063326e-05, + "loss": 2.9073, + "step": 138600 + }, + { + "epoch": 3.55, + "learning_rate": 3.815623185435667e-05, + "loss": 2.9153, + "step": 138700 + }, + { + "epoch": 3.56, + "learning_rate": 3.8147692728080064e-05, + "loss": 2.8937, + "step": 138800 + }, + { + "epoch": 3.56, + "learning_rate": 3.8139153601803465e-05, + "loss": 2.9533, + "step": 138900 + }, + { + "epoch": 3.56, + "learning_rate": 3.813061447552687e-05, + "loss": 2.923, + "step": 139000 + }, + { + "epoch": 3.56, + "learning_rate": 3.812207534925027e-05, + "loss": 2.9185, + "step": 139100 + }, + { + "epoch": 3.57, + "learning_rate": 3.811353622297367e-05, + "loss": 3.0708, + "step": 139200 + }, + { + "epoch": 3.57, + "learning_rate": 3.810499709669707e-05, + "loss": 3.0239, + "step": 139300 + }, + { + "epoch": 3.57, + "learning_rate": 3.8096457970420466e-05, + "loss": 3.0284, + "step": 139400 + }, + { + "epoch": 3.57, + "learning_rate": 3.8087918844143875e-05, + "loss": 2.762, + "step": 139500 + }, + { + "epoch": 3.58, + "learning_rate": 3.807937971786727e-05, + "loss": 2.975, + "step": 139600 + }, + { + "epoch": 3.58, + "learning_rate": 3.807084059159067e-05, + "loss": 3.1, + "step": 139700 + }, + { + "epoch": 3.58, + "learning_rate": 3.806230146531407e-05, + "loss": 2.9565, + "step": 139800 + }, + { + "epoch": 3.58, + "learning_rate": 3.8053762339037474e-05, + "loss": 2.8751, + "step": 139900 + }, + { + "epoch": 3.59, + "learning_rate": 3.804522321276087e-05, + "loss": 2.9852, + "step": 140000 + }, + { + "epoch": 3.59, + "learning_rate": 3.803668408648428e-05, + "loss": 2.8591, + "step": 140100 + }, + { + "epoch": 3.59, + "learning_rate": 3.802814496020767e-05, + "loss": 2.8044, + "step": 140200 + }, + { + "epoch": 3.59, + "learning_rate": 3.8019605833931073e-05, + "loss": 3.0422, + "step": 140300 + }, + { + "epoch": 3.6, + "learning_rate": 3.8011066707654475e-05, + "loss": 2.9013, + "step": 140400 + }, + { + "epoch": 3.6, + "learning_rate": 3.800252758137787e-05, + "loss": 2.9534, + "step": 140500 + }, + { + "epoch": 3.6, + "learning_rate": 3.799398845510128e-05, + "loss": 2.9336, + "step": 140600 + }, + { + "epoch": 3.6, + "learning_rate": 3.798544932882467e-05, + "loss": 3.0396, + "step": 140700 + }, + { + "epoch": 3.61, + "learning_rate": 3.7976910202548074e-05, + "loss": 2.8578, + "step": 140800 + }, + { + "epoch": 3.61, + "learning_rate": 3.7968371076271476e-05, + "loss": 3.0548, + "step": 140900 + }, + { + "epoch": 3.61, + "learning_rate": 3.795983194999488e-05, + "loss": 2.7049, + "step": 141000 + }, + { + "epoch": 3.61, + "learning_rate": 3.795129282371828e-05, + "loss": 2.9947, + "step": 141100 + }, + { + "epoch": 3.62, + "learning_rate": 3.794275369744168e-05, + "loss": 2.8426, + "step": 141200 + }, + { + "epoch": 3.62, + "learning_rate": 3.7934214571165075e-05, + "loss": 2.8844, + "step": 141300 + }, + { + "epoch": 3.62, + "learning_rate": 3.7925675444888484e-05, + "loss": 2.7712, + "step": 141400 + }, + { + "epoch": 3.62, + "learning_rate": 3.791713631861188e-05, + "loss": 2.8845, + "step": 141500 + }, + { + "epoch": 3.63, + "learning_rate": 3.790859719233528e-05, + "loss": 2.8524, + "step": 141600 + }, + { + "epoch": 3.63, + "learning_rate": 3.790005806605868e-05, + "loss": 2.6535, + "step": 141700 + }, + { + "epoch": 3.63, + "learning_rate": 3.789151893978208e-05, + "loss": 2.9005, + "step": 141800 + }, + { + "epoch": 3.64, + "learning_rate": 3.7882979813505485e-05, + "loss": 2.9857, + "step": 141900 + }, + { + "epoch": 3.64, + "learning_rate": 3.7874440687228886e-05, + "loss": 2.7747, + "step": 142000 + }, + { + "epoch": 3.64, + "learning_rate": 3.786590156095228e-05, + "loss": 3.0335, + "step": 142100 + }, + { + "epoch": 3.64, + "learning_rate": 3.785736243467569e-05, + "loss": 2.82, + "step": 142200 + }, + { + "epoch": 3.65, + "learning_rate": 3.7848823308399084e-05, + "loss": 3.0067, + "step": 142300 + }, + { + "epoch": 3.65, + "learning_rate": 3.7840284182122486e-05, + "loss": 2.9935, + "step": 142400 + }, + { + "epoch": 3.65, + "learning_rate": 3.783174505584589e-05, + "loss": 2.9691, + "step": 142500 + }, + { + "epoch": 3.65, + "learning_rate": 3.782320592956929e-05, + "loss": 2.9787, + "step": 142600 + }, + { + "epoch": 3.66, + "learning_rate": 3.7814666803292684e-05, + "loss": 3.0089, + "step": 142700 + }, + { + "epoch": 3.66, + "learning_rate": 3.780612767701609e-05, + "loss": 3.0469, + "step": 142800 + }, + { + "epoch": 3.66, + "learning_rate": 3.779758855073949e-05, + "loss": 3.0362, + "step": 142900 + }, + { + "epoch": 3.66, + "learning_rate": 3.778904942446289e-05, + "loss": 2.9551, + "step": 143000 + }, + { + "epoch": 3.67, + "learning_rate": 3.778051029818629e-05, + "loss": 2.6902, + "step": 143100 + }, + { + "epoch": 3.67, + "learning_rate": 3.777197117190969e-05, + "loss": 2.8595, + "step": 143200 + }, + { + "epoch": 3.67, + "learning_rate": 3.776343204563309e-05, + "loss": 2.8926, + "step": 143300 + }, + { + "epoch": 3.67, + "learning_rate": 3.7754892919356494e-05, + "loss": 2.8702, + "step": 143400 + }, + { + "epoch": 3.68, + "learning_rate": 3.774635379307989e-05, + "loss": 2.9157, + "step": 143500 + }, + { + "epoch": 3.68, + "learning_rate": 3.77378146668033e-05, + "loss": 2.8203, + "step": 143600 + }, + { + "epoch": 3.68, + "learning_rate": 3.772927554052669e-05, + "loss": 2.9764, + "step": 143700 + }, + { + "epoch": 3.68, + "learning_rate": 3.7720736414250094e-05, + "loss": 3.0338, + "step": 143800 + }, + { + "epoch": 3.69, + "learning_rate": 3.7712197287973495e-05, + "loss": 2.8205, + "step": 143900 + }, + { + "epoch": 3.69, + "learning_rate": 3.77036581616969e-05, + "loss": 2.9368, + "step": 144000 + }, + { + "epoch": 3.69, + "learning_rate": 3.76951190354203e-05, + "loss": 3.0391, + "step": 144100 + }, + { + "epoch": 3.69, + "learning_rate": 3.76865799091437e-05, + "loss": 2.9394, + "step": 144200 + }, + { + "epoch": 3.7, + "learning_rate": 3.7678040782867095e-05, + "loss": 3.0125, + "step": 144300 + }, + { + "epoch": 3.7, + "learning_rate": 3.76695016565905e-05, + "loss": 2.9793, + "step": 144400 + }, + { + "epoch": 3.7, + "learning_rate": 3.76609625303139e-05, + "loss": 3.0127, + "step": 144500 + }, + { + "epoch": 3.7, + "learning_rate": 3.76524234040373e-05, + "loss": 2.9295, + "step": 144600 + }, + { + "epoch": 3.71, + "learning_rate": 3.76438842777607e-05, + "loss": 2.926, + "step": 144700 + }, + { + "epoch": 3.71, + "learning_rate": 3.76353451514841e-05, + "loss": 2.8679, + "step": 144800 + }, + { + "epoch": 3.71, + "learning_rate": 3.76268060252075e-05, + "loss": 2.874, + "step": 144900 + }, + { + "epoch": 3.71, + "learning_rate": 3.7618266898930906e-05, + "loss": 2.8243, + "step": 145000 + }, + { + "epoch": 3.72, + "learning_rate": 3.76097277726543e-05, + "loss": 2.6775, + "step": 145100 + }, + { + "epoch": 3.72, + "learning_rate": 3.76011886463777e-05, + "loss": 2.9936, + "step": 145200 + }, + { + "epoch": 3.72, + "learning_rate": 3.7592649520101103e-05, + "loss": 2.87, + "step": 145300 + }, + { + "epoch": 3.72, + "learning_rate": 3.7584110393824505e-05, + "loss": 2.6478, + "step": 145400 + }, + { + "epoch": 3.73, + "learning_rate": 3.7575571267547907e-05, + "loss": 3.1228, + "step": 145500 + }, + { + "epoch": 3.73, + "learning_rate": 3.756703214127131e-05, + "loss": 2.9155, + "step": 145600 + }, + { + "epoch": 3.73, + "learning_rate": 3.75584930149947e-05, + "loss": 2.9365, + "step": 145700 + }, + { + "epoch": 3.74, + "learning_rate": 3.754995388871811e-05, + "loss": 2.9418, + "step": 145800 + }, + { + "epoch": 3.74, + "learning_rate": 3.7541414762441506e-05, + "loss": 2.8428, + "step": 145900 + }, + { + "epoch": 3.74, + "learning_rate": 3.753287563616491e-05, + "loss": 2.9287, + "step": 146000 + }, + { + "epoch": 3.74, + "learning_rate": 3.752433650988831e-05, + "loss": 2.7329, + "step": 146100 + }, + { + "epoch": 3.75, + "learning_rate": 3.751579738361171e-05, + "loss": 2.9242, + "step": 146200 + }, + { + "epoch": 3.75, + "learning_rate": 3.750725825733511e-05, + "loss": 3.0612, + "step": 146300 + }, + { + "epoch": 3.75, + "learning_rate": 3.7498719131058514e-05, + "loss": 2.8653, + "step": 146400 + }, + { + "epoch": 3.75, + "learning_rate": 3.749018000478191e-05, + "loss": 2.989, + "step": 146500 + }, + { + "epoch": 3.76, + "learning_rate": 3.748164087850532e-05, + "loss": 2.9249, + "step": 146600 + }, + { + "epoch": 3.76, + "learning_rate": 3.747310175222871e-05, + "loss": 2.9209, + "step": 146700 + }, + { + "epoch": 3.76, + "learning_rate": 3.746456262595211e-05, + "loss": 2.7595, + "step": 146800 + }, + { + "epoch": 3.76, + "learning_rate": 3.7456023499675515e-05, + "loss": 2.8333, + "step": 146900 + }, + { + "epoch": 3.77, + "learning_rate": 3.7447484373398916e-05, + "loss": 2.886, + "step": 147000 + }, + { + "epoch": 3.77, + "learning_rate": 3.743894524712232e-05, + "loss": 2.806, + "step": 147100 + }, + { + "epoch": 3.77, + "learning_rate": 3.743040612084572e-05, + "loss": 2.8587, + "step": 147200 + }, + { + "epoch": 3.77, + "learning_rate": 3.7421866994569114e-05, + "loss": 2.8406, + "step": 147300 + }, + { + "epoch": 3.78, + "learning_rate": 3.741332786829252e-05, + "loss": 2.7372, + "step": 147400 + }, + { + "epoch": 3.78, + "learning_rate": 3.740478874201592e-05, + "loss": 3.1244, + "step": 147500 + }, + { + "epoch": 3.78, + "learning_rate": 3.739624961573932e-05, + "loss": 2.8076, + "step": 147600 + }, + { + "epoch": 3.78, + "learning_rate": 3.738771048946272e-05, + "loss": 2.7125, + "step": 147700 + }, + { + "epoch": 3.79, + "learning_rate": 3.737917136318612e-05, + "loss": 2.8492, + "step": 147800 + }, + { + "epoch": 3.79, + "learning_rate": 3.737063223690952e-05, + "loss": 2.9047, + "step": 147900 + }, + { + "epoch": 3.79, + "learning_rate": 3.7362093110632925e-05, + "loss": 2.7274, + "step": 148000 + }, + { + "epoch": 3.79, + "learning_rate": 3.735355398435632e-05, + "loss": 2.96, + "step": 148100 + }, + { + "epoch": 3.8, + "learning_rate": 3.734501485807972e-05, + "loss": 2.8808, + "step": 148200 + }, + { + "epoch": 3.8, + "learning_rate": 3.733647573180312e-05, + "loss": 2.749, + "step": 148300 + }, + { + "epoch": 3.8, + "learning_rate": 3.7327936605526524e-05, + "loss": 2.6918, + "step": 148400 + }, + { + "epoch": 3.8, + "learning_rate": 3.7319397479249926e-05, + "loss": 3.0696, + "step": 148500 + }, + { + "epoch": 3.81, + "learning_rate": 3.731085835297333e-05, + "loss": 2.9101, + "step": 148600 + }, + { + "epoch": 3.81, + "learning_rate": 3.730231922669672e-05, + "loss": 3.0157, + "step": 148700 + }, + { + "epoch": 3.81, + "learning_rate": 3.729378010042013e-05, + "loss": 3.138, + "step": 148800 + }, + { + "epoch": 3.81, + "learning_rate": 3.7285240974143525e-05, + "loss": 2.9909, + "step": 148900 + }, + { + "epoch": 3.82, + "learning_rate": 3.727670184786693e-05, + "loss": 2.6905, + "step": 149000 + }, + { + "epoch": 3.82, + "learning_rate": 3.726816272159033e-05, + "loss": 2.7996, + "step": 149100 + }, + { + "epoch": 3.82, + "learning_rate": 3.725962359531373e-05, + "loss": 2.8014, + "step": 149200 + }, + { + "epoch": 3.82, + "learning_rate": 3.725108446903713e-05, + "loss": 2.914, + "step": 149300 + }, + { + "epoch": 3.83, + "learning_rate": 3.724254534276053e-05, + "loss": 2.9544, + "step": 149400 + }, + { + "epoch": 3.83, + "learning_rate": 3.723400621648393e-05, + "loss": 2.7879, + "step": 149500 + }, + { + "epoch": 3.83, + "learning_rate": 3.7225467090207336e-05, + "loss": 2.741, + "step": 149600 + }, + { + "epoch": 3.83, + "learning_rate": 3.721692796393073e-05, + "loss": 2.9305, + "step": 149700 + }, + { + "epoch": 3.84, + "learning_rate": 3.720838883765413e-05, + "loss": 2.8774, + "step": 149800 + }, + { + "epoch": 3.84, + "learning_rate": 3.7199849711377534e-05, + "loss": 2.8693, + "step": 149900 + }, + { + "epoch": 3.84, + "learning_rate": 3.7191310585100936e-05, + "loss": 2.8799, + "step": 150000 + }, + { + "epoch": 3.85, + "learning_rate": 3.718277145882433e-05, + "loss": 2.9987, + "step": 150100 + }, + { + "epoch": 3.85, + "learning_rate": 3.717423233254774e-05, + "loss": 3.0406, + "step": 150200 + }, + { + "epoch": 3.85, + "learning_rate": 3.7165693206271133e-05, + "loss": 2.8638, + "step": 150300 + }, + { + "epoch": 3.85, + "learning_rate": 3.7157154079994535e-05, + "loss": 3.0982, + "step": 150400 + }, + { + "epoch": 3.86, + "learning_rate": 3.7148614953717937e-05, + "loss": 2.8802, + "step": 150500 + }, + { + "epoch": 3.86, + "learning_rate": 3.714007582744134e-05, + "loss": 2.8833, + "step": 150600 + }, + { + "epoch": 3.86, + "learning_rate": 3.713153670116474e-05, + "loss": 2.9561, + "step": 150700 + }, + { + "epoch": 3.86, + "learning_rate": 3.712299757488814e-05, + "loss": 2.853, + "step": 150800 + }, + { + "epoch": 3.87, + "learning_rate": 3.7114458448611536e-05, + "loss": 3.0224, + "step": 150900 + }, + { + "epoch": 3.87, + "learning_rate": 3.7105919322334944e-05, + "loss": 3.076, + "step": 151000 + }, + { + "epoch": 3.87, + "learning_rate": 3.709738019605834e-05, + "loss": 2.7872, + "step": 151100 + }, + { + "epoch": 3.87, + "learning_rate": 3.708884106978174e-05, + "loss": 2.9373, + "step": 151200 + }, + { + "epoch": 3.88, + "learning_rate": 3.708030194350514e-05, + "loss": 3.0687, + "step": 151300 + }, + { + "epoch": 3.88, + "learning_rate": 3.7071762817228544e-05, + "loss": 2.9908, + "step": 151400 + }, + { + "epoch": 3.88, + "learning_rate": 3.7063223690951945e-05, + "loss": 3.005, + "step": 151500 + }, + { + "epoch": 3.88, + "learning_rate": 3.705468456467535e-05, + "loss": 2.8163, + "step": 151600 + }, + { + "epoch": 3.89, + "learning_rate": 3.704614543839874e-05, + "loss": 2.804, + "step": 151700 + }, + { + "epoch": 3.89, + "learning_rate": 3.703760631212215e-05, + "loss": 2.8771, + "step": 151800 + }, + { + "epoch": 3.89, + "learning_rate": 3.7029067185845545e-05, + "loss": 2.7535, + "step": 151900 + }, + { + "epoch": 3.89, + "learning_rate": 3.7020528059568946e-05, + "loss": 3.0275, + "step": 152000 + }, + { + "epoch": 3.9, + "learning_rate": 3.701198893329235e-05, + "loss": 3.0363, + "step": 152100 + }, + { + "epoch": 3.9, + "learning_rate": 3.700344980701575e-05, + "loss": 2.7471, + "step": 152200 + }, + { + "epoch": 3.9, + "learning_rate": 3.699491068073915e-05, + "loss": 2.9191, + "step": 152300 + }, + { + "epoch": 3.9, + "learning_rate": 3.698637155446255e-05, + "loss": 3.0247, + "step": 152400 + }, + { + "epoch": 3.91, + "learning_rate": 3.697783242818595e-05, + "loss": 2.8533, + "step": 152500 + }, + { + "epoch": 3.91, + "learning_rate": 3.6969293301909356e-05, + "loss": 2.8196, + "step": 152600 + }, + { + "epoch": 3.91, + "learning_rate": 3.696075417563275e-05, + "loss": 2.9538, + "step": 152700 + }, + { + "epoch": 3.91, + "learning_rate": 3.695221504935615e-05, + "loss": 2.973, + "step": 152800 + }, + { + "epoch": 3.92, + "learning_rate": 3.694367592307955e-05, + "loss": 2.8075, + "step": 152900 + }, + { + "epoch": 3.92, + "learning_rate": 3.6935136796802955e-05, + "loss": 2.8664, + "step": 153000 + }, + { + "epoch": 3.92, + "learning_rate": 3.692659767052635e-05, + "loss": 3.0199, + "step": 153100 + }, + { + "epoch": 3.92, + "learning_rate": 3.691805854424976e-05, + "loss": 2.7264, + "step": 153200 + }, + { + "epoch": 3.93, + "learning_rate": 3.690951941797315e-05, + "loss": 2.7767, + "step": 153300 + }, + { + "epoch": 3.93, + "learning_rate": 3.6900980291696554e-05, + "loss": 2.8702, + "step": 153400 + }, + { + "epoch": 3.93, + "learning_rate": 3.6892441165419956e-05, + "loss": 2.8619, + "step": 153500 + }, + { + "epoch": 3.93, + "learning_rate": 3.688390203914336e-05, + "loss": 2.7582, + "step": 153600 + }, + { + "epoch": 3.94, + "learning_rate": 3.687536291286676e-05, + "loss": 2.8249, + "step": 153700 + }, + { + "epoch": 3.94, + "learning_rate": 3.686682378659016e-05, + "loss": 2.9421, + "step": 153800 + }, + { + "epoch": 3.94, + "learning_rate": 3.6858284660313555e-05, + "loss": 2.9268, + "step": 153900 + }, + { + "epoch": 3.95, + "learning_rate": 3.6849745534036964e-05, + "loss": 2.9982, + "step": 154000 + }, + { + "epoch": 3.95, + "learning_rate": 3.684120640776036e-05, + "loss": 2.8282, + "step": 154100 + }, + { + "epoch": 3.95, + "learning_rate": 3.683266728148376e-05, + "loss": 2.6538, + "step": 154200 + }, + { + "epoch": 3.95, + "learning_rate": 3.682412815520716e-05, + "loss": 2.9332, + "step": 154300 + }, + { + "epoch": 3.96, + "learning_rate": 3.681558902893056e-05, + "loss": 2.9436, + "step": 154400 + }, + { + "epoch": 3.96, + "learning_rate": 3.6807049902653965e-05, + "loss": 2.9657, + "step": 154500 + }, + { + "epoch": 3.96, + "learning_rate": 3.6798510776377366e-05, + "loss": 2.8455, + "step": 154600 + }, + { + "epoch": 3.96, + "learning_rate": 3.678997165010076e-05, + "loss": 3.0626, + "step": 154700 + }, + { + "epoch": 3.97, + "learning_rate": 3.678143252382417e-05, + "loss": 3.0584, + "step": 154800 + }, + { + "epoch": 3.97, + "learning_rate": 3.6772893397547564e-05, + "loss": 3.0656, + "step": 154900 + }, + { + "epoch": 3.97, + "learning_rate": 3.6764354271270966e-05, + "loss": 2.9965, + "step": 155000 + }, + { + "epoch": 3.97, + "learning_rate": 3.675581514499437e-05, + "loss": 3.1522, + "step": 155100 + }, + { + "epoch": 3.98, + "learning_rate": 3.674727601871777e-05, + "loss": 2.9915, + "step": 155200 + }, + { + "epoch": 3.98, + "learning_rate": 3.6738736892441163e-05, + "loss": 2.927, + "step": 155300 + }, + { + "epoch": 3.98, + "learning_rate": 3.673019776616457e-05, + "loss": 3.0846, + "step": 155400 + }, + { + "epoch": 3.98, + "learning_rate": 3.6721658639887967e-05, + "loss": 2.9297, + "step": 155500 + }, + { + "epoch": 3.99, + "learning_rate": 3.6713119513611375e-05, + "loss": 2.8472, + "step": 155600 + }, + { + "epoch": 3.99, + "learning_rate": 3.670458038733477e-05, + "loss": 2.7157, + "step": 155700 + }, + { + "epoch": 3.99, + "learning_rate": 3.669604126105817e-05, + "loss": 2.8304, + "step": 155800 + }, + { + "epoch": 3.99, + "learning_rate": 3.668750213478157e-05, + "loss": 2.9214, + "step": 155900 + }, + { + "epoch": 4.0, + "learning_rate": 3.6678963008504974e-05, + "loss": 2.8294, + "step": 156000 + }, + { + "epoch": 4.0, + "learning_rate": 3.667042388222837e-05, + "loss": 2.7344, + "step": 156100 + }, + { + "epoch": 4.0, + "learning_rate": 3.666188475595178e-05, + "loss": 2.9135, + "step": 156200 + }, + { + "epoch": 4.0, + "learning_rate": 3.665334562967517e-05, + "loss": 2.7938, + "step": 156300 + }, + { + "epoch": 4.01, + "learning_rate": 3.6644806503398574e-05, + "loss": 2.7882, + "step": 156400 + }, + { + "epoch": 4.01, + "learning_rate": 3.6636267377121975e-05, + "loss": 2.7811, + "step": 156500 + }, + { + "epoch": 4.01, + "learning_rate": 3.662772825084538e-05, + "loss": 2.8213, + "step": 156600 + }, + { + "epoch": 4.01, + "learning_rate": 3.661918912456878e-05, + "loss": 2.8907, + "step": 156700 + }, + { + "epoch": 4.02, + "learning_rate": 3.661064999829218e-05, + "loss": 2.8295, + "step": 156800 + }, + { + "epoch": 4.02, + "learning_rate": 3.6602110872015575e-05, + "loss": 2.7955, + "step": 156900 + }, + { + "epoch": 4.02, + "learning_rate": 3.659357174573898e-05, + "loss": 2.9084, + "step": 157000 + }, + { + "epoch": 4.02, + "learning_rate": 3.658503261946238e-05, + "loss": 2.7326, + "step": 157100 + }, + { + "epoch": 4.03, + "learning_rate": 3.657649349318578e-05, + "loss": 2.8411, + "step": 157200 + }, + { + "epoch": 4.03, + "learning_rate": 3.656795436690918e-05, + "loss": 2.9072, + "step": 157300 + }, + { + "epoch": 4.03, + "learning_rate": 3.655941524063258e-05, + "loss": 2.7981, + "step": 157400 + }, + { + "epoch": 4.03, + "learning_rate": 3.6550876114355984e-05, + "loss": 2.7779, + "step": 157500 + }, + { + "epoch": 4.04, + "learning_rate": 3.6542336988079385e-05, + "loss": 3.0451, + "step": 157600 + }, + { + "epoch": 4.04, + "learning_rate": 3.653379786180278e-05, + "loss": 3.1684, + "step": 157700 + }, + { + "epoch": 4.04, + "learning_rate": 3.652525873552618e-05, + "loss": 2.9367, + "step": 157800 + }, + { + "epoch": 4.04, + "learning_rate": 3.651671960924958e-05, + "loss": 2.8674, + "step": 157900 + }, + { + "epoch": 4.05, + "learning_rate": 3.650818048297298e-05, + "loss": 2.9975, + "step": 158000 + }, + { + "epoch": 4.05, + "learning_rate": 3.6499641356696386e-05, + "loss": 3.1238, + "step": 158100 + }, + { + "epoch": 4.05, + "learning_rate": 3.649110223041978e-05, + "loss": 2.8777, + "step": 158200 + }, + { + "epoch": 4.06, + "learning_rate": 3.648256310414318e-05, + "loss": 2.7382, + "step": 158300 + }, + { + "epoch": 4.06, + "learning_rate": 3.6474023977866584e-05, + "loss": 3.0051, + "step": 158400 + }, + { + "epoch": 4.06, + "learning_rate": 3.6465484851589986e-05, + "loss": 2.9901, + "step": 158500 + }, + { + "epoch": 4.06, + "learning_rate": 3.645694572531339e-05, + "loss": 2.9462, + "step": 158600 + }, + { + "epoch": 4.07, + "learning_rate": 3.644840659903679e-05, + "loss": 3.0967, + "step": 158700 + }, + { + "epoch": 4.07, + "learning_rate": 3.6439867472760184e-05, + "loss": 2.8192, + "step": 158800 + }, + { + "epoch": 4.07, + "learning_rate": 3.643132834648359e-05, + "loss": 3.0004, + "step": 158900 + }, + { + "epoch": 4.07, + "learning_rate": 3.642278922020699e-05, + "loss": 2.8679, + "step": 159000 + }, + { + "epoch": 4.08, + "learning_rate": 3.641425009393039e-05, + "loss": 2.6969, + "step": 159100 + }, + { + "epoch": 4.08, + "learning_rate": 3.640571096765379e-05, + "loss": 2.8129, + "step": 159200 + }, + { + "epoch": 4.08, + "learning_rate": 3.639717184137719e-05, + "loss": 2.9315, + "step": 159300 + }, + { + "epoch": 4.08, + "learning_rate": 3.638863271510059e-05, + "loss": 2.8286, + "step": 159400 + }, + { + "epoch": 4.09, + "learning_rate": 3.6380093588823995e-05, + "loss": 2.8636, + "step": 159500 + }, + { + "epoch": 4.09, + "learning_rate": 3.637155446254739e-05, + "loss": 2.8053, + "step": 159600 + }, + { + "epoch": 4.09, + "learning_rate": 3.63630153362708e-05, + "loss": 2.8558, + "step": 159700 + }, + { + "epoch": 4.09, + "learning_rate": 3.635447620999419e-05, + "loss": 2.8891, + "step": 159800 + }, + { + "epoch": 4.1, + "learning_rate": 3.6345937083717594e-05, + "loss": 2.8303, + "step": 159900 + }, + { + "epoch": 4.1, + "learning_rate": 3.6337397957440996e-05, + "loss": 2.8755, + "step": 160000 + }, + { + "epoch": 4.1, + "learning_rate": 3.63288588311644e-05, + "loss": 2.9814, + "step": 160100 + }, + { + "epoch": 4.1, + "learning_rate": 3.632031970488779e-05, + "loss": 2.9474, + "step": 160200 + }, + { + "epoch": 4.11, + "learning_rate": 3.63117805786112e-05, + "loss": 2.7752, + "step": 160300 + }, + { + "epoch": 4.11, + "learning_rate": 3.6303241452334595e-05, + "loss": 2.9008, + "step": 160400 + }, + { + "epoch": 4.11, + "learning_rate": 3.6294702326058e-05, + "loss": 2.8035, + "step": 160500 + }, + { + "epoch": 4.11, + "learning_rate": 3.62861631997814e-05, + "loss": 2.899, + "step": 160600 + }, + { + "epoch": 4.12, + "learning_rate": 3.62776240735048e-05, + "loss": 3.0576, + "step": 160700 + }, + { + "epoch": 4.12, + "learning_rate": 3.62690849472282e-05, + "loss": 2.9951, + "step": 160800 + }, + { + "epoch": 4.12, + "learning_rate": 3.62605458209516e-05, + "loss": 2.7915, + "step": 160900 + }, + { + "epoch": 4.12, + "learning_rate": 3.6252006694675e-05, + "loss": 3.0475, + "step": 161000 + }, + { + "epoch": 4.13, + "learning_rate": 3.6243467568398406e-05, + "loss": 2.8174, + "step": 161100 + }, + { + "epoch": 4.13, + "learning_rate": 3.62349284421218e-05, + "loss": 2.8561, + "step": 161200 + }, + { + "epoch": 4.13, + "learning_rate": 3.62263893158452e-05, + "loss": 2.9166, + "step": 161300 + }, + { + "epoch": 4.13, + "learning_rate": 3.6217850189568604e-05, + "loss": 2.8769, + "step": 161400 + }, + { + "epoch": 4.14, + "learning_rate": 3.6209311063292005e-05, + "loss": 2.9089, + "step": 161500 + }, + { + "epoch": 4.14, + "learning_rate": 3.620077193701541e-05, + "loss": 2.9342, + "step": 161600 + }, + { + "epoch": 4.14, + "learning_rate": 3.619223281073881e-05, + "loss": 2.7861, + "step": 161700 + }, + { + "epoch": 4.14, + "learning_rate": 3.61836936844622e-05, + "loss": 2.9156, + "step": 161800 + }, + { + "epoch": 4.15, + "learning_rate": 3.617515455818561e-05, + "loss": 2.8686, + "step": 161900 + }, + { + "epoch": 4.15, + "learning_rate": 3.6166615431909006e-05, + "loss": 2.8429, + "step": 162000 + }, + { + "epoch": 4.15, + "learning_rate": 3.615807630563241e-05, + "loss": 2.917, + "step": 162100 + }, + { + "epoch": 4.16, + "learning_rate": 3.614953717935581e-05, + "loss": 2.9653, + "step": 162200 + }, + { + "epoch": 4.16, + "learning_rate": 3.614099805307921e-05, + "loss": 2.9225, + "step": 162300 + }, + { + "epoch": 4.16, + "learning_rate": 3.613245892680261e-05, + "loss": 2.9669, + "step": 162400 + }, + { + "epoch": 4.16, + "learning_rate": 3.6123919800526014e-05, + "loss": 2.9289, + "step": 162500 + }, + { + "epoch": 4.17, + "learning_rate": 3.611538067424941e-05, + "loss": 2.8101, + "step": 162600 + }, + { + "epoch": 4.17, + "learning_rate": 3.610684154797282e-05, + "loss": 2.7668, + "step": 162700 + }, + { + "epoch": 4.17, + "learning_rate": 3.609830242169621e-05, + "loss": 2.6915, + "step": 162800 + }, + { + "epoch": 4.17, + "learning_rate": 3.608976329541961e-05, + "loss": 2.8424, + "step": 162900 + }, + { + "epoch": 4.18, + "learning_rate": 3.6081224169143015e-05, + "loss": 2.9561, + "step": 163000 + }, + { + "epoch": 4.18, + "learning_rate": 3.6072685042866416e-05, + "loss": 2.8571, + "step": 163100 + }, + { + "epoch": 4.18, + "learning_rate": 3.606414591658981e-05, + "loss": 3.0197, + "step": 163200 + }, + { + "epoch": 4.18, + "learning_rate": 3.605560679031322e-05, + "loss": 2.8519, + "step": 163300 + }, + { + "epoch": 4.19, + "learning_rate": 3.6047067664036614e-05, + "loss": 2.6463, + "step": 163400 + }, + { + "epoch": 4.19, + "learning_rate": 3.6038528537760016e-05, + "loss": 2.6629, + "step": 163500 + }, + { + "epoch": 4.19, + "learning_rate": 3.602998941148342e-05, + "loss": 2.8949, + "step": 163600 + }, + { + "epoch": 4.19, + "learning_rate": 3.602145028520682e-05, + "loss": 2.709, + "step": 163700 + }, + { + "epoch": 4.2, + "learning_rate": 3.601291115893022e-05, + "loss": 2.7052, + "step": 163800 + }, + { + "epoch": 4.2, + "learning_rate": 3.600437203265362e-05, + "loss": 2.7292, + "step": 163900 + }, + { + "epoch": 4.2, + "learning_rate": 3.599583290637702e-05, + "loss": 2.7062, + "step": 164000 + }, + { + "epoch": 4.2, + "learning_rate": 3.5987293780100425e-05, + "loss": 2.8184, + "step": 164100 + }, + { + "epoch": 4.21, + "learning_rate": 3.597875465382382e-05, + "loss": 2.8715, + "step": 164200 + }, + { + "epoch": 4.21, + "learning_rate": 3.597021552754722e-05, + "loss": 2.9527, + "step": 164300 + }, + { + "epoch": 4.21, + "learning_rate": 3.596167640127062e-05, + "loss": 2.9549, + "step": 164400 + }, + { + "epoch": 4.21, + "learning_rate": 3.5953137274994025e-05, + "loss": 2.7693, + "step": 164500 + }, + { + "epoch": 4.22, + "learning_rate": 3.5944598148717426e-05, + "loss": 2.847, + "step": 164600 + }, + { + "epoch": 4.22, + "learning_rate": 3.593605902244083e-05, + "loss": 2.9589, + "step": 164700 + }, + { + "epoch": 4.22, + "learning_rate": 3.592751989616422e-05, + "loss": 2.7985, + "step": 164800 + }, + { + "epoch": 4.22, + "learning_rate": 3.591898076988763e-05, + "loss": 2.7499, + "step": 164900 + }, + { + "epoch": 4.23, + "learning_rate": 3.5910441643611026e-05, + "loss": 2.8699, + "step": 165000 + }, + { + "epoch": 4.23, + "learning_rate": 3.590190251733443e-05, + "loss": 2.9339, + "step": 165100 + }, + { + "epoch": 4.23, + "learning_rate": 3.589336339105783e-05, + "loss": 2.7762, + "step": 165200 + }, + { + "epoch": 4.23, + "learning_rate": 3.588482426478123e-05, + "loss": 2.6134, + "step": 165300 + }, + { + "epoch": 4.24, + "learning_rate": 3.587628513850463e-05, + "loss": 2.8161, + "step": 165400 + }, + { + "epoch": 4.24, + "learning_rate": 3.586774601222803e-05, + "loss": 2.9087, + "step": 165500 + }, + { + "epoch": 4.24, + "learning_rate": 3.585920688595143e-05, + "loss": 2.7699, + "step": 165600 + }, + { + "epoch": 4.24, + "learning_rate": 3.5850667759674836e-05, + "loss": 2.9153, + "step": 165700 + }, + { + "epoch": 4.25, + "learning_rate": 3.584212863339823e-05, + "loss": 2.9153, + "step": 165800 + }, + { + "epoch": 4.25, + "learning_rate": 3.583358950712163e-05, + "loss": 2.8115, + "step": 165900 + }, + { + "epoch": 4.25, + "learning_rate": 3.5825050380845034e-05, + "loss": 2.6892, + "step": 166000 + }, + { + "epoch": 4.26, + "learning_rate": 3.5816511254568436e-05, + "loss": 2.7988, + "step": 166100 + }, + { + "epoch": 4.26, + "learning_rate": 3.580797212829183e-05, + "loss": 2.9257, + "step": 166200 + }, + { + "epoch": 4.26, + "learning_rate": 3.579943300201524e-05, + "loss": 2.8666, + "step": 166300 + }, + { + "epoch": 4.26, + "learning_rate": 3.5790893875738634e-05, + "loss": 2.7994, + "step": 166400 + }, + { + "epoch": 4.27, + "learning_rate": 3.5782354749462035e-05, + "loss": 2.9492, + "step": 166500 + }, + { + "epoch": 4.27, + "learning_rate": 3.577381562318544e-05, + "loss": 2.8635, + "step": 166600 + }, + { + "epoch": 4.27, + "learning_rate": 3.576527649690884e-05, + "loss": 2.868, + "step": 166700 + }, + { + "epoch": 4.27, + "learning_rate": 3.575673737063224e-05, + "loss": 2.887, + "step": 166800 + }, + { + "epoch": 4.28, + "learning_rate": 3.574819824435564e-05, + "loss": 2.8005, + "step": 166900 + }, + { + "epoch": 4.28, + "learning_rate": 3.5739659118079036e-05, + "loss": 2.905, + "step": 167000 + }, + { + "epoch": 4.28, + "learning_rate": 3.5731119991802444e-05, + "loss": 2.7722, + "step": 167100 + }, + { + "epoch": 4.28, + "learning_rate": 3.572258086552584e-05, + "loss": 2.836, + "step": 167200 + }, + { + "epoch": 4.29, + "learning_rate": 3.571404173924924e-05, + "loss": 2.9579, + "step": 167300 + }, + { + "epoch": 4.29, + "learning_rate": 3.570550261297264e-05, + "loss": 2.7944, + "step": 167400 + }, + { + "epoch": 4.29, + "learning_rate": 3.5696963486696044e-05, + "loss": 2.7011, + "step": 167500 + }, + { + "epoch": 4.29, + "learning_rate": 3.5688424360419445e-05, + "loss": 2.8905, + "step": 167600 + }, + { + "epoch": 4.3, + "learning_rate": 3.567988523414285e-05, + "loss": 2.9255, + "step": 167700 + }, + { + "epoch": 4.3, + "learning_rate": 3.567134610786624e-05, + "loss": 2.919, + "step": 167800 + }, + { + "epoch": 4.3, + "learning_rate": 3.566280698158965e-05, + "loss": 2.9211, + "step": 167900 + }, + { + "epoch": 4.3, + "learning_rate": 3.5654267855313045e-05, + "loss": 2.7088, + "step": 168000 + }, + { + "epoch": 4.31, + "learning_rate": 3.5645728729036446e-05, + "loss": 2.6552, + "step": 168100 + }, + { + "epoch": 4.31, + "learning_rate": 3.563718960275985e-05, + "loss": 2.9929, + "step": 168200 + }, + { + "epoch": 4.31, + "learning_rate": 3.562865047648325e-05, + "loss": 2.8072, + "step": 168300 + }, + { + "epoch": 4.31, + "learning_rate": 3.5620111350206644e-05, + "loss": 2.9567, + "step": 168400 + }, + { + "epoch": 4.32, + "learning_rate": 3.561157222393005e-05, + "loss": 2.8903, + "step": 168500 + }, + { + "epoch": 4.32, + "learning_rate": 3.560303309765345e-05, + "loss": 2.8801, + "step": 168600 + }, + { + "epoch": 4.32, + "learning_rate": 3.559449397137685e-05, + "loss": 2.749, + "step": 168700 + }, + { + "epoch": 4.32, + "learning_rate": 3.558595484510025e-05, + "loss": 2.8069, + "step": 168800 + }, + { + "epoch": 4.33, + "learning_rate": 3.557741571882365e-05, + "loss": 2.8033, + "step": 168900 + }, + { + "epoch": 4.33, + "learning_rate": 3.5568876592547054e-05, + "loss": 2.6476, + "step": 169000 + }, + { + "epoch": 4.33, + "learning_rate": 3.5560337466270455e-05, + "loss": 3.0062, + "step": 169100 + }, + { + "epoch": 4.33, + "learning_rate": 3.555179833999385e-05, + "loss": 2.9767, + "step": 169200 + }, + { + "epoch": 4.34, + "learning_rate": 3.554325921371726e-05, + "loss": 2.8588, + "step": 169300 + }, + { + "epoch": 4.34, + "learning_rate": 3.553472008744065e-05, + "loss": 2.8502, + "step": 169400 + }, + { + "epoch": 4.34, + "learning_rate": 3.5526180961164055e-05, + "loss": 3.108, + "step": 169500 + }, + { + "epoch": 4.34, + "learning_rate": 3.5517641834887456e-05, + "loss": 2.7552, + "step": 169600 + }, + { + "epoch": 4.35, + "learning_rate": 3.550910270861086e-05, + "loss": 3.0094, + "step": 169700 + }, + { + "epoch": 4.35, + "learning_rate": 3.550056358233426e-05, + "loss": 2.8699, + "step": 169800 + }, + { + "epoch": 4.35, + "learning_rate": 3.549202445605766e-05, + "loss": 2.8333, + "step": 169900 + }, + { + "epoch": 4.35, + "learning_rate": 3.5483485329781055e-05, + "loss": 2.8573, + "step": 170000 + }, + { + "epoch": 4.36, + "learning_rate": 3.5474946203504464e-05, + "loss": 2.8919, + "step": 170100 + }, + { + "epoch": 4.36, + "learning_rate": 3.546640707722786e-05, + "loss": 3.0407, + "step": 170200 + }, + { + "epoch": 4.36, + "learning_rate": 3.545786795095126e-05, + "loss": 2.8344, + "step": 170300 + }, + { + "epoch": 4.37, + "learning_rate": 3.544932882467466e-05, + "loss": 2.8432, + "step": 170400 + }, + { + "epoch": 4.37, + "learning_rate": 3.544078969839806e-05, + "loss": 2.646, + "step": 170500 + }, + { + "epoch": 4.37, + "learning_rate": 3.5432250572121465e-05, + "loss": 2.7221, + "step": 170600 + }, + { + "epoch": 4.37, + "learning_rate": 3.5423711445844866e-05, + "loss": 2.8708, + "step": 170700 + }, + { + "epoch": 4.38, + "learning_rate": 3.541517231956826e-05, + "loss": 2.8989, + "step": 170800 + }, + { + "epoch": 4.38, + "learning_rate": 3.540663319329167e-05, + "loss": 2.9317, + "step": 170900 + }, + { + "epoch": 4.38, + "learning_rate": 3.5398094067015064e-05, + "loss": 2.7789, + "step": 171000 + }, + { + "epoch": 4.38, + "learning_rate": 3.5389554940738466e-05, + "loss": 2.9044, + "step": 171100 + }, + { + "epoch": 4.39, + "learning_rate": 3.538101581446187e-05, + "loss": 2.8208, + "step": 171200 + }, + { + "epoch": 4.39, + "learning_rate": 3.537247668818527e-05, + "loss": 2.7788, + "step": 171300 + }, + { + "epoch": 4.39, + "learning_rate": 3.5363937561908664e-05, + "loss": 2.7944, + "step": 171400 + }, + { + "epoch": 4.39, + "learning_rate": 3.535539843563207e-05, + "loss": 2.837, + "step": 171500 + }, + { + "epoch": 4.4, + "learning_rate": 3.534685930935547e-05, + "loss": 2.9127, + "step": 171600 + }, + { + "epoch": 4.4, + "learning_rate": 3.533832018307887e-05, + "loss": 2.847, + "step": 171700 + }, + { + "epoch": 4.4, + "learning_rate": 3.532978105680227e-05, + "loss": 2.853, + "step": 171800 + }, + { + "epoch": 4.4, + "learning_rate": 3.532124193052567e-05, + "loss": 2.6868, + "step": 171900 + }, + { + "epoch": 4.41, + "learning_rate": 3.531270280424907e-05, + "loss": 3.0012, + "step": 172000 + }, + { + "epoch": 4.41, + "learning_rate": 3.5304163677972474e-05, + "loss": 3.0031, + "step": 172100 + }, + { + "epoch": 4.41, + "learning_rate": 3.529562455169587e-05, + "loss": 2.8709, + "step": 172200 + }, + { + "epoch": 4.41, + "learning_rate": 3.528708542541928e-05, + "loss": 2.707, + "step": 172300 + }, + { + "epoch": 4.42, + "learning_rate": 3.527854629914267e-05, + "loss": 2.6456, + "step": 172400 + }, + { + "epoch": 4.42, + "learning_rate": 3.5270007172866074e-05, + "loss": 2.6917, + "step": 172500 + }, + { + "epoch": 4.42, + "learning_rate": 3.5261468046589475e-05, + "loss": 2.7597, + "step": 172600 + }, + { + "epoch": 4.42, + "learning_rate": 3.525292892031288e-05, + "loss": 2.5973, + "step": 172700 + }, + { + "epoch": 4.43, + "learning_rate": 3.524438979403628e-05, + "loss": 2.7756, + "step": 172800 + }, + { + "epoch": 4.43, + "learning_rate": 3.523585066775968e-05, + "loss": 2.8136, + "step": 172900 + }, + { + "epoch": 4.43, + "learning_rate": 3.5227311541483075e-05, + "loss": 2.7833, + "step": 173000 + }, + { + "epoch": 4.43, + "learning_rate": 3.521877241520648e-05, + "loss": 2.7723, + "step": 173100 + }, + { + "epoch": 4.44, + "learning_rate": 3.521023328892988e-05, + "loss": 2.8283, + "step": 173200 + }, + { + "epoch": 4.44, + "learning_rate": 3.520169416265328e-05, + "loss": 3.0486, + "step": 173300 + }, + { + "epoch": 4.44, + "learning_rate": 3.519315503637668e-05, + "loss": 2.9591, + "step": 173400 + }, + { + "epoch": 4.44, + "learning_rate": 3.518461591010008e-05, + "loss": 2.7472, + "step": 173500 + }, + { + "epoch": 4.45, + "learning_rate": 3.517607678382348e-05, + "loss": 2.8585, + "step": 173600 + }, + { + "epoch": 4.45, + "learning_rate": 3.5167537657546886e-05, + "loss": 2.9055, + "step": 173700 + }, + { + "epoch": 4.45, + "learning_rate": 3.515899853127028e-05, + "loss": 2.8869, + "step": 173800 + }, + { + "epoch": 4.45, + "learning_rate": 3.515045940499368e-05, + "loss": 2.9449, + "step": 173900 + }, + { + "epoch": 4.46, + "learning_rate": 3.5141920278717084e-05, + "loss": 2.7024, + "step": 174000 + }, + { + "epoch": 4.46, + "learning_rate": 3.5133381152440485e-05, + "loss": 2.931, + "step": 174100 + }, + { + "epoch": 4.46, + "learning_rate": 3.512484202616389e-05, + "loss": 2.8525, + "step": 174200 + }, + { + "epoch": 4.47, + "learning_rate": 3.511630289988729e-05, + "loss": 2.9064, + "step": 174300 + }, + { + "epoch": 4.47, + "learning_rate": 3.510776377361068e-05, + "loss": 2.6678, + "step": 174400 + }, + { + "epoch": 4.47, + "learning_rate": 3.509922464733409e-05, + "loss": 2.9454, + "step": 174500 + }, + { + "epoch": 4.47, + "learning_rate": 3.5090685521057486e-05, + "loss": 2.7788, + "step": 174600 + }, + { + "epoch": 4.48, + "learning_rate": 3.508214639478089e-05, + "loss": 2.949, + "step": 174700 + }, + { + "epoch": 4.48, + "learning_rate": 3.507360726850429e-05, + "loss": 2.8295, + "step": 174800 + }, + { + "epoch": 4.48, + "learning_rate": 3.506506814222769e-05, + "loss": 2.8091, + "step": 174900 + }, + { + "epoch": 4.48, + "learning_rate": 3.505652901595109e-05, + "loss": 2.8725, + "step": 175000 + }, + { + "epoch": 4.49, + "learning_rate": 3.5047989889674494e-05, + "loss": 2.9462, + "step": 175100 + }, + { + "epoch": 4.49, + "learning_rate": 3.503945076339789e-05, + "loss": 2.9779, + "step": 175200 + }, + { + "epoch": 4.49, + "learning_rate": 3.50309116371213e-05, + "loss": 2.9337, + "step": 175300 + }, + { + "epoch": 4.49, + "learning_rate": 3.502237251084469e-05, + "loss": 2.7945, + "step": 175400 + }, + { + "epoch": 4.5, + "learning_rate": 3.501383338456809e-05, + "loss": 2.8266, + "step": 175500 + }, + { + "epoch": 4.5, + "learning_rate": 3.5005294258291495e-05, + "loss": 2.7534, + "step": 175600 + }, + { + "epoch": 4.5, + "learning_rate": 3.499675513201489e-05, + "loss": 2.9093, + "step": 175700 + }, + { + "epoch": 4.5, + "learning_rate": 3.49882160057383e-05, + "loss": 2.8192, + "step": 175800 + }, + { + "epoch": 4.51, + "learning_rate": 3.497967687946169e-05, + "loss": 2.7516, + "step": 175900 + }, + { + "epoch": 4.51, + "learning_rate": 3.4971137753185094e-05, + "loss": 2.8898, + "step": 176000 + }, + { + "epoch": 4.51, + "learning_rate": 3.4962598626908496e-05, + "loss": 2.9933, + "step": 176100 + }, + { + "epoch": 4.51, + "learning_rate": 3.49540595006319e-05, + "loss": 3.0046, + "step": 176200 + }, + { + "epoch": 4.52, + "learning_rate": 3.494552037435529e-05, + "loss": 2.8737, + "step": 176300 + }, + { + "epoch": 4.52, + "learning_rate": 3.49369812480787e-05, + "loss": 2.9585, + "step": 176400 + }, + { + "epoch": 4.52, + "learning_rate": 3.4928442121802095e-05, + "loss": 2.7224, + "step": 176500 + }, + { + "epoch": 4.52, + "learning_rate": 3.49199029955255e-05, + "loss": 2.9317, + "step": 176600 + }, + { + "epoch": 4.53, + "learning_rate": 3.49113638692489e-05, + "loss": 2.896, + "step": 176700 + }, + { + "epoch": 4.53, + "learning_rate": 3.49028247429723e-05, + "loss": 2.7501, + "step": 176800 + }, + { + "epoch": 4.53, + "learning_rate": 3.48942856166957e-05, + "loss": 2.8642, + "step": 176900 + }, + { + "epoch": 4.53, + "learning_rate": 3.48857464904191e-05, + "loss": 3.2009, + "step": 177000 + }, + { + "epoch": 4.54, + "learning_rate": 3.48772073641425e-05, + "loss": 2.8458, + "step": 177100 + }, + { + "epoch": 4.54, + "learning_rate": 3.4868668237865906e-05, + "loss": 2.7384, + "step": 177200 + }, + { + "epoch": 4.54, + "learning_rate": 3.48601291115893e-05, + "loss": 2.9411, + "step": 177300 + }, + { + "epoch": 4.54, + "learning_rate": 3.48515899853127e-05, + "loss": 2.8195, + "step": 177400 + }, + { + "epoch": 4.55, + "learning_rate": 3.4843050859036104e-05, + "loss": 2.8032, + "step": 177500 + }, + { + "epoch": 4.55, + "learning_rate": 3.4834511732759505e-05, + "loss": 2.8678, + "step": 177600 + }, + { + "epoch": 4.55, + "learning_rate": 3.482597260648291e-05, + "loss": 2.7897, + "step": 177700 + }, + { + "epoch": 4.55, + "learning_rate": 3.481743348020631e-05, + "loss": 2.8494, + "step": 177800 + }, + { + "epoch": 4.56, + "learning_rate": 3.48088943539297e-05, + "loss": 2.9871, + "step": 177900 + }, + { + "epoch": 4.56, + "learning_rate": 3.480035522765311e-05, + "loss": 2.8696, + "step": 178000 + }, + { + "epoch": 4.56, + "learning_rate": 3.4791816101376506e-05, + "loss": 2.8582, + "step": 178100 + }, + { + "epoch": 4.57, + "learning_rate": 3.478327697509991e-05, + "loss": 2.8457, + "step": 178200 + }, + { + "epoch": 4.57, + "learning_rate": 3.477473784882331e-05, + "loss": 2.8258, + "step": 178300 + }, + { + "epoch": 4.57, + "learning_rate": 3.476619872254671e-05, + "loss": 2.6707, + "step": 178400 + }, + { + "epoch": 4.57, + "learning_rate": 3.4757659596270106e-05, + "loss": 2.8959, + "step": 178500 + }, + { + "epoch": 4.58, + "learning_rate": 3.4749120469993514e-05, + "loss": 2.7047, + "step": 178600 + }, + { + "epoch": 4.58, + "learning_rate": 3.474058134371691e-05, + "loss": 2.8022, + "step": 178700 + }, + { + "epoch": 4.58, + "learning_rate": 3.473204221744031e-05, + "loss": 2.9514, + "step": 178800 + }, + { + "epoch": 4.58, + "learning_rate": 3.472350309116371e-05, + "loss": 2.7032, + "step": 178900 + }, + { + "epoch": 4.59, + "learning_rate": 3.4714963964887114e-05, + "loss": 2.7697, + "step": 179000 + }, + { + "epoch": 4.59, + "learning_rate": 3.4706424838610515e-05, + "loss": 2.6671, + "step": 179100 + }, + { + "epoch": 4.59, + "learning_rate": 3.4697885712333917e-05, + "loss": 2.9532, + "step": 179200 + }, + { + "epoch": 4.59, + "learning_rate": 3.468934658605731e-05, + "loss": 2.895, + "step": 179300 + }, + { + "epoch": 4.6, + "learning_rate": 3.468080745978072e-05, + "loss": 2.6902, + "step": 179400 + }, + { + "epoch": 4.6, + "learning_rate": 3.4672268333504114e-05, + "loss": 2.9458, + "step": 179500 + }, + { + "epoch": 4.6, + "learning_rate": 3.4663729207227516e-05, + "loss": 2.8755, + "step": 179600 + }, + { + "epoch": 4.6, + "learning_rate": 3.465519008095092e-05, + "loss": 2.7573, + "step": 179700 + }, + { + "epoch": 4.61, + "learning_rate": 3.464665095467432e-05, + "loss": 2.971, + "step": 179800 + }, + { + "epoch": 4.61, + "learning_rate": 3.463811182839772e-05, + "loss": 2.7879, + "step": 179900 + }, + { + "epoch": 4.61, + "learning_rate": 3.462957270212112e-05, + "loss": 2.742, + "step": 180000 + }, + { + "epoch": 4.61, + "learning_rate": 3.462103357584452e-05, + "loss": 2.8963, + "step": 180100 + }, + { + "epoch": 4.62, + "learning_rate": 3.4612494449567925e-05, + "loss": 2.8663, + "step": 180200 + }, + { + "epoch": 4.62, + "learning_rate": 3.460395532329132e-05, + "loss": 2.8793, + "step": 180300 + }, + { + "epoch": 4.62, + "learning_rate": 3.459541619701472e-05, + "loss": 2.8008, + "step": 180400 + }, + { + "epoch": 4.62, + "learning_rate": 3.458687707073812e-05, + "loss": 2.766, + "step": 180500 + }, + { + "epoch": 4.63, + "learning_rate": 3.4578337944461525e-05, + "loss": 2.9717, + "step": 180600 + }, + { + "epoch": 4.63, + "learning_rate": 3.4569798818184926e-05, + "loss": 2.7027, + "step": 180700 + }, + { + "epoch": 4.63, + "learning_rate": 3.456125969190833e-05, + "loss": 2.6709, + "step": 180800 + }, + { + "epoch": 4.63, + "learning_rate": 3.455272056563172e-05, + "loss": 2.7249, + "step": 180900 + }, + { + "epoch": 4.64, + "learning_rate": 3.454418143935513e-05, + "loss": 2.7697, + "step": 181000 + }, + { + "epoch": 4.64, + "learning_rate": 3.4535642313078526e-05, + "loss": 2.771, + "step": 181100 + }, + { + "epoch": 4.64, + "learning_rate": 3.452710318680193e-05, + "loss": 2.6931, + "step": 181200 + }, + { + "epoch": 4.64, + "learning_rate": 3.451856406052533e-05, + "loss": 2.8809, + "step": 181300 + }, + { + "epoch": 4.65, + "learning_rate": 3.451002493424873e-05, + "loss": 2.9175, + "step": 181400 + }, + { + "epoch": 4.65, + "learning_rate": 3.4501485807972125e-05, + "loss": 2.838, + "step": 181500 + }, + { + "epoch": 4.65, + "learning_rate": 3.4492946681695533e-05, + "loss": 2.7583, + "step": 181600 + }, + { + "epoch": 4.65, + "learning_rate": 3.448440755541893e-05, + "loss": 2.7249, + "step": 181700 + }, + { + "epoch": 4.66, + "learning_rate": 3.447586842914233e-05, + "loss": 2.9048, + "step": 181800 + }, + { + "epoch": 4.66, + "learning_rate": 3.446732930286573e-05, + "loss": 2.6823, + "step": 181900 + }, + { + "epoch": 4.66, + "learning_rate": 3.445879017658913e-05, + "loss": 2.8402, + "step": 182000 + }, + { + "epoch": 4.66, + "learning_rate": 3.4450251050312534e-05, + "loss": 2.8399, + "step": 182100 + }, + { + "epoch": 4.67, + "learning_rate": 3.4441711924035936e-05, + "loss": 2.8171, + "step": 182200 + }, + { + "epoch": 4.67, + "learning_rate": 3.443317279775933e-05, + "loss": 2.7929, + "step": 182300 + }, + { + "epoch": 4.67, + "learning_rate": 3.442463367148274e-05, + "loss": 2.8139, + "step": 182400 + }, + { + "epoch": 4.68, + "learning_rate": 3.4416094545206134e-05, + "loss": 2.6094, + "step": 182500 + }, + { + "epoch": 4.68, + "learning_rate": 3.4407555418929535e-05, + "loss": 2.8512, + "step": 182600 + }, + { + "epoch": 4.68, + "learning_rate": 3.439901629265294e-05, + "loss": 2.9651, + "step": 182700 + }, + { + "epoch": 4.68, + "learning_rate": 3.439047716637634e-05, + "loss": 2.8471, + "step": 182800 + }, + { + "epoch": 4.69, + "learning_rate": 3.438193804009974e-05, + "loss": 2.7212, + "step": 182900 + }, + { + "epoch": 4.69, + "learning_rate": 3.437339891382314e-05, + "loss": 2.7698, + "step": 183000 + }, + { + "epoch": 4.69, + "learning_rate": 3.4364859787546536e-05, + "loss": 2.7476, + "step": 183100 + }, + { + "epoch": 4.69, + "learning_rate": 3.4356320661269945e-05, + "loss": 3.0972, + "step": 183200 + }, + { + "epoch": 4.7, + "learning_rate": 3.434778153499334e-05, + "loss": 2.8557, + "step": 183300 + }, + { + "epoch": 4.7, + "learning_rate": 3.433924240871674e-05, + "loss": 2.9168, + "step": 183400 + }, + { + "epoch": 4.7, + "learning_rate": 3.433070328244014e-05, + "loss": 2.893, + "step": 183500 + }, + { + "epoch": 4.7, + "learning_rate": 3.4322164156163544e-05, + "loss": 2.8344, + "step": 183600 + }, + { + "epoch": 4.71, + "learning_rate": 3.431362502988694e-05, + "loss": 2.7075, + "step": 183700 + }, + { + "epoch": 4.71, + "learning_rate": 3.430508590361035e-05, + "loss": 2.7563, + "step": 183800 + }, + { + "epoch": 4.71, + "learning_rate": 3.429654677733374e-05, + "loss": 2.7989, + "step": 183900 + }, + { + "epoch": 4.71, + "learning_rate": 3.4288007651057143e-05, + "loss": 2.8689, + "step": 184000 + }, + { + "epoch": 4.72, + "learning_rate": 3.4279468524780545e-05, + "loss": 2.7793, + "step": 184100 + }, + { + "epoch": 4.72, + "learning_rate": 3.4270929398503947e-05, + "loss": 2.935, + "step": 184200 + }, + { + "epoch": 4.72, + "learning_rate": 3.426239027222735e-05, + "loss": 2.8373, + "step": 184300 + }, + { + "epoch": 4.72, + "learning_rate": 3.425385114595075e-05, + "loss": 2.7941, + "step": 184400 + }, + { + "epoch": 4.73, + "learning_rate": 3.4245312019674144e-05, + "loss": 2.8139, + "step": 184500 + }, + { + "epoch": 4.73, + "learning_rate": 3.423677289339755e-05, + "loss": 2.9945, + "step": 184600 + }, + { + "epoch": 4.73, + "learning_rate": 3.422823376712095e-05, + "loss": 2.8065, + "step": 184700 + }, + { + "epoch": 4.73, + "learning_rate": 3.421969464084435e-05, + "loss": 2.7373, + "step": 184800 + }, + { + "epoch": 4.74, + "learning_rate": 3.421115551456775e-05, + "loss": 2.9016, + "step": 184900 + }, + { + "epoch": 4.74, + "learning_rate": 3.420261638829115e-05, + "loss": 2.7924, + "step": 185000 + }, + { + "epoch": 4.74, + "learning_rate": 3.4194077262014554e-05, + "loss": 2.6757, + "step": 185100 + }, + { + "epoch": 4.74, + "learning_rate": 3.4185538135737955e-05, + "loss": 2.98, + "step": 185200 + }, + { + "epoch": 4.75, + "learning_rate": 3.417699900946135e-05, + "loss": 2.6377, + "step": 185300 + }, + { + "epoch": 4.75, + "learning_rate": 3.416845988318476e-05, + "loss": 2.7265, + "step": 185400 + }, + { + "epoch": 4.75, + "learning_rate": 3.415992075690815e-05, + "loss": 3.0404, + "step": 185500 + }, + { + "epoch": 4.75, + "learning_rate": 3.4151381630631555e-05, + "loss": 2.854, + "step": 185600 + }, + { + "epoch": 4.76, + "learning_rate": 3.4142842504354956e-05, + "loss": 2.8406, + "step": 185700 + }, + { + "epoch": 4.76, + "learning_rate": 3.413430337807836e-05, + "loss": 2.8111, + "step": 185800 + }, + { + "epoch": 4.76, + "learning_rate": 3.412576425180176e-05, + "loss": 2.713, + "step": 185900 + }, + { + "epoch": 4.76, + "learning_rate": 3.411722512552516e-05, + "loss": 2.9278, + "step": 186000 + }, + { + "epoch": 4.77, + "learning_rate": 3.4108685999248556e-05, + "loss": 2.7635, + "step": 186100 + }, + { + "epoch": 4.77, + "learning_rate": 3.4100146872971964e-05, + "loss": 2.7555, + "step": 186200 + }, + { + "epoch": 4.77, + "learning_rate": 3.409160774669536e-05, + "loss": 2.7749, + "step": 186300 + }, + { + "epoch": 4.78, + "learning_rate": 3.408306862041876e-05, + "loss": 2.7755, + "step": 186400 + }, + { + "epoch": 4.78, + "learning_rate": 3.407452949414216e-05, + "loss": 2.8913, + "step": 186500 + }, + { + "epoch": 4.78, + "learning_rate": 3.4065990367865563e-05, + "loss": 2.9017, + "step": 186600 + }, + { + "epoch": 4.78, + "learning_rate": 3.405745124158896e-05, + "loss": 2.6779, + "step": 186700 + }, + { + "epoch": 4.79, + "learning_rate": 3.4048912115312367e-05, + "loss": 2.8349, + "step": 186800 + }, + { + "epoch": 4.79, + "learning_rate": 3.404037298903576e-05, + "loss": 3.0744, + "step": 186900 + }, + { + "epoch": 4.79, + "learning_rate": 3.403183386275916e-05, + "loss": 2.8017, + "step": 187000 + }, + { + "epoch": 4.79, + "learning_rate": 3.4023294736482564e-05, + "loss": 2.6702, + "step": 187100 + }, + { + "epoch": 4.8, + "learning_rate": 3.4014755610205966e-05, + "loss": 2.7281, + "step": 187200 + }, + { + "epoch": 4.8, + "learning_rate": 3.400621648392937e-05, + "loss": 2.7488, + "step": 187300 + }, + { + "epoch": 4.8, + "learning_rate": 3.399767735765277e-05, + "loss": 2.7947, + "step": 187400 + }, + { + "epoch": 4.8, + "learning_rate": 3.3989138231376164e-05, + "loss": 2.8185, + "step": 187500 + }, + { + "epoch": 4.81, + "learning_rate": 3.398059910509957e-05, + "loss": 2.8457, + "step": 187600 + }, + { + "epoch": 4.81, + "learning_rate": 3.397205997882297e-05, + "loss": 2.9416, + "step": 187700 + }, + { + "epoch": 4.81, + "learning_rate": 3.396352085254637e-05, + "loss": 2.7752, + "step": 187800 + }, + { + "epoch": 4.81, + "learning_rate": 3.395498172626977e-05, + "loss": 2.6326, + "step": 187900 + }, + { + "epoch": 4.82, + "learning_rate": 3.394644259999317e-05, + "loss": 2.7806, + "step": 188000 + }, + { + "epoch": 4.82, + "learning_rate": 3.393790347371657e-05, + "loss": 2.8613, + "step": 188100 + }, + { + "epoch": 4.82, + "learning_rate": 3.3929364347439975e-05, + "loss": 2.9939, + "step": 188200 + }, + { + "epoch": 4.82, + "learning_rate": 3.392082522116337e-05, + "loss": 2.9626, + "step": 188300 + }, + { + "epoch": 4.83, + "learning_rate": 3.391228609488678e-05, + "loss": 2.8134, + "step": 188400 + }, + { + "epoch": 4.83, + "learning_rate": 3.390374696861017e-05, + "loss": 2.6093, + "step": 188500 + }, + { + "epoch": 4.83, + "learning_rate": 3.3895207842333574e-05, + "loss": 2.837, + "step": 188600 + }, + { + "epoch": 4.83, + "learning_rate": 3.3886668716056976e-05, + "loss": 2.8663, + "step": 188700 + }, + { + "epoch": 4.84, + "learning_rate": 3.387812958978038e-05, + "loss": 2.9026, + "step": 188800 + }, + { + "epoch": 4.84, + "learning_rate": 3.386959046350377e-05, + "loss": 2.7949, + "step": 188900 + }, + { + "epoch": 4.84, + "learning_rate": 3.386105133722718e-05, + "loss": 2.7179, + "step": 189000 + }, + { + "epoch": 4.84, + "learning_rate": 3.3852512210950575e-05, + "loss": 2.6774, + "step": 189100 + }, + { + "epoch": 4.85, + "learning_rate": 3.384397308467398e-05, + "loss": 2.7785, + "step": 189200 + }, + { + "epoch": 4.85, + "learning_rate": 3.383543395839738e-05, + "loss": 2.9502, + "step": 189300 + }, + { + "epoch": 4.85, + "learning_rate": 3.382689483212078e-05, + "loss": 2.7294, + "step": 189400 + }, + { + "epoch": 4.85, + "learning_rate": 3.381835570584418e-05, + "loss": 2.7518, + "step": 189500 + }, + { + "epoch": 4.86, + "learning_rate": 3.380981657956758e-05, + "loss": 2.6775, + "step": 189600 + }, + { + "epoch": 4.86, + "learning_rate": 3.380127745329098e-05, + "loss": 2.7685, + "step": 189700 + }, + { + "epoch": 4.86, + "learning_rate": 3.3792738327014386e-05, + "loss": 2.6363, + "step": 189800 + }, + { + "epoch": 4.86, + "learning_rate": 3.378419920073778e-05, + "loss": 2.8164, + "step": 189900 + }, + { + "epoch": 4.87, + "learning_rate": 3.377566007446118e-05, + "loss": 2.8068, + "step": 190000 + }, + { + "epoch": 4.87, + "learning_rate": 3.3767120948184584e-05, + "loss": 2.8893, + "step": 190100 + }, + { + "epoch": 4.87, + "learning_rate": 3.3758581821907985e-05, + "loss": 3.1823, + "step": 190200 + }, + { + "epoch": 4.87, + "learning_rate": 3.375004269563139e-05, + "loss": 2.8149, + "step": 190300 + }, + { + "epoch": 4.88, + "learning_rate": 3.374150356935479e-05, + "loss": 2.8773, + "step": 190400 + }, + { + "epoch": 4.88, + "learning_rate": 3.373296444307818e-05, + "loss": 2.8124, + "step": 190500 + }, + { + "epoch": 4.88, + "learning_rate": 3.372442531680159e-05, + "loss": 2.6555, + "step": 190600 + }, + { + "epoch": 4.89, + "learning_rate": 3.3715886190524986e-05, + "loss": 2.5526, + "step": 190700 + }, + { + "epoch": 4.89, + "learning_rate": 3.370734706424839e-05, + "loss": 2.6868, + "step": 190800 + }, + { + "epoch": 4.89, + "learning_rate": 3.369880793797179e-05, + "loss": 2.7514, + "step": 190900 + }, + { + "epoch": 4.89, + "learning_rate": 3.369026881169519e-05, + "loss": 2.8321, + "step": 191000 + }, + { + "epoch": 4.9, + "learning_rate": 3.368172968541859e-05, + "loss": 2.6201, + "step": 191100 + }, + { + "epoch": 4.9, + "learning_rate": 3.3673190559141994e-05, + "loss": 2.8815, + "step": 191200 + }, + { + "epoch": 4.9, + "learning_rate": 3.366465143286539e-05, + "loss": 2.8936, + "step": 191300 + }, + { + "epoch": 4.9, + "learning_rate": 3.36561123065888e-05, + "loss": 3.011, + "step": 191400 + }, + { + "epoch": 4.91, + "learning_rate": 3.364757318031219e-05, + "loss": 2.8954, + "step": 191500 + }, + { + "epoch": 4.91, + "learning_rate": 3.363903405403559e-05, + "loss": 2.7834, + "step": 191600 + }, + { + "epoch": 4.91, + "learning_rate": 3.3630494927758995e-05, + "loss": 2.6221, + "step": 191700 + }, + { + "epoch": 4.91, + "learning_rate": 3.3621955801482396e-05, + "loss": 2.842, + "step": 191800 + }, + { + "epoch": 4.92, + "learning_rate": 3.361341667520579e-05, + "loss": 2.7935, + "step": 191900 + }, + { + "epoch": 4.92, + "learning_rate": 3.36048775489292e-05, + "loss": 2.9066, + "step": 192000 + }, + { + "epoch": 4.92, + "learning_rate": 3.3596338422652594e-05, + "loss": 2.8521, + "step": 192100 + }, + { + "epoch": 4.92, + "learning_rate": 3.3587799296375996e-05, + "loss": 2.7344, + "step": 192200 + }, + { + "epoch": 4.93, + "learning_rate": 3.35792601700994e-05, + "loss": 2.7969, + "step": 192300 + }, + { + "epoch": 4.93, + "learning_rate": 3.35707210438228e-05, + "loss": 2.8141, + "step": 192400 + }, + { + "epoch": 4.93, + "learning_rate": 3.35621819175462e-05, + "loss": 2.8277, + "step": 192500 + }, + { + "epoch": 4.93, + "learning_rate": 3.35536427912696e-05, + "loss": 3.0706, + "step": 192600 + }, + { + "epoch": 4.94, + "learning_rate": 3.3545103664993e-05, + "loss": 2.6699, + "step": 192700 + }, + { + "epoch": 4.94, + "learning_rate": 3.3536564538716405e-05, + "loss": 2.7326, + "step": 192800 + }, + { + "epoch": 4.94, + "learning_rate": 3.35280254124398e-05, + "loss": 2.9995, + "step": 192900 + }, + { + "epoch": 4.94, + "learning_rate": 3.35194862861632e-05, + "loss": 2.6497, + "step": 193000 + }, + { + "epoch": 4.95, + "learning_rate": 3.35109471598866e-05, + "loss": 2.7172, + "step": 193100 + }, + { + "epoch": 4.95, + "learning_rate": 3.350240803361e-05, + "loss": 2.6651, + "step": 193200 + }, + { + "epoch": 4.95, + "learning_rate": 3.3493868907333406e-05, + "loss": 2.6778, + "step": 193300 + }, + { + "epoch": 4.95, + "learning_rate": 3.34853297810568e-05, + "loss": 2.9511, + "step": 193400 + }, + { + "epoch": 4.96, + "learning_rate": 3.34767906547802e-05, + "loss": 2.8123, + "step": 193500 + }, + { + "epoch": 4.96, + "learning_rate": 3.3468251528503604e-05, + "loss": 2.7655, + "step": 193600 + }, + { + "epoch": 4.96, + "learning_rate": 3.3459712402227006e-05, + "loss": 2.7086, + "step": 193700 + }, + { + "epoch": 4.96, + "learning_rate": 3.345117327595041e-05, + "loss": 3.0242, + "step": 193800 + }, + { + "epoch": 4.97, + "learning_rate": 3.344263414967381e-05, + "loss": 2.7939, + "step": 193900 + }, + { + "epoch": 4.97, + "learning_rate": 3.3434095023397203e-05, + "loss": 2.7467, + "step": 194000 + }, + { + "epoch": 4.97, + "learning_rate": 3.342555589712061e-05, + "loss": 2.789, + "step": 194100 + }, + { + "epoch": 4.97, + "learning_rate": 3.3417016770844007e-05, + "loss": 2.9062, + "step": 194200 + }, + { + "epoch": 4.98, + "learning_rate": 3.340847764456741e-05, + "loss": 2.9673, + "step": 194300 + }, + { + "epoch": 4.98, + "learning_rate": 3.339993851829081e-05, + "loss": 2.7477, + "step": 194400 + }, + { + "epoch": 4.98, + "learning_rate": 3.339139939201421e-05, + "loss": 2.7192, + "step": 194500 + }, + { + "epoch": 4.99, + "learning_rate": 3.3382860265737606e-05, + "loss": 2.88, + "step": 194600 + }, + { + "epoch": 4.99, + "learning_rate": 3.3374321139461014e-05, + "loss": 2.7896, + "step": 194700 + }, + { + "epoch": 4.99, + "learning_rate": 3.336578201318441e-05, + "loss": 2.6271, + "step": 194800 + }, + { + "epoch": 4.99, + "learning_rate": 3.335724288690781e-05, + "loss": 2.864, + "step": 194900 + }, + { + "epoch": 5.0, + "learning_rate": 3.334870376063121e-05, + "loss": 2.8348, + "step": 195000 + }, + { + "epoch": 5.0, + "learning_rate": 3.3340164634354614e-05, + "loss": 2.8911, + "step": 195100 + }, + { + "epoch": 5.0, + "learning_rate": 3.3331625508078015e-05, + "loss": 2.7296, + "step": 195200 + }, + { + "epoch": 5.0, + "learning_rate": 3.332308638180142e-05, + "loss": 2.6448, + "step": 195300 + }, + { + "epoch": 5.01, + "learning_rate": 3.331454725552481e-05, + "loss": 2.948, + "step": 195400 + }, + { + "epoch": 5.01, + "learning_rate": 3.330600812924822e-05, + "loss": 2.8449, + "step": 195500 + }, + { + "epoch": 5.01, + "learning_rate": 3.3297469002971615e-05, + "loss": 2.5992, + "step": 195600 + }, + { + "epoch": 5.01, + "learning_rate": 3.3288929876695016e-05, + "loss": 2.7582, + "step": 195700 + }, + { + "epoch": 5.02, + "learning_rate": 3.328039075041842e-05, + "loss": 2.842, + "step": 195800 + }, + { + "epoch": 5.02, + "learning_rate": 3.327185162414182e-05, + "loss": 2.8697, + "step": 195900 + }, + { + "epoch": 5.02, + "learning_rate": 3.326331249786522e-05, + "loss": 2.8471, + "step": 196000 + }, + { + "epoch": 5.02, + "learning_rate": 3.325477337158862e-05, + "loss": 2.8698, + "step": 196100 + }, + { + "epoch": 5.03, + "learning_rate": 3.324623424531202e-05, + "loss": 2.6487, + "step": 196200 + }, + { + "epoch": 5.03, + "learning_rate": 3.3237695119035425e-05, + "loss": 2.7105, + "step": 196300 + }, + { + "epoch": 5.03, + "learning_rate": 3.322915599275882e-05, + "loss": 2.8269, + "step": 196400 + }, + { + "epoch": 5.03, + "learning_rate": 3.322061686648222e-05, + "loss": 2.7231, + "step": 196500 + }, + { + "epoch": 5.04, + "learning_rate": 3.321207774020562e-05, + "loss": 2.7965, + "step": 196600 + }, + { + "epoch": 5.04, + "learning_rate": 3.3203538613929025e-05, + "loss": 2.7904, + "step": 196700 + }, + { + "epoch": 5.04, + "learning_rate": 3.319499948765242e-05, + "loss": 2.9175, + "step": 196800 + }, + { + "epoch": 5.04, + "learning_rate": 3.318646036137583e-05, + "loss": 2.6893, + "step": 196900 + }, + { + "epoch": 5.05, + "learning_rate": 3.317792123509922e-05, + "loss": 2.6112, + "step": 197000 + }, + { + "epoch": 5.05, + "learning_rate": 3.3169382108822624e-05, + "loss": 2.7459, + "step": 197100 + }, + { + "epoch": 5.05, + "learning_rate": 3.3160842982546026e-05, + "loss": 2.8013, + "step": 197200 + }, + { + "epoch": 5.05, + "learning_rate": 3.315230385626943e-05, + "loss": 2.6266, + "step": 197300 + }, + { + "epoch": 5.06, + "learning_rate": 3.314376472999283e-05, + "loss": 3.0153, + "step": 197400 + }, + { + "epoch": 5.06, + "learning_rate": 3.313522560371623e-05, + "loss": 2.8443, + "step": 197500 + }, + { + "epoch": 5.06, + "learning_rate": 3.3126686477439625e-05, + "loss": 2.9788, + "step": 197600 + }, + { + "epoch": 5.06, + "learning_rate": 3.3118147351163034e-05, + "loss": 2.8516, + "step": 197700 + }, + { + "epoch": 5.07, + "learning_rate": 3.310960822488643e-05, + "loss": 2.7007, + "step": 197800 + }, + { + "epoch": 5.07, + "learning_rate": 3.310106909860983e-05, + "loss": 2.8234, + "step": 197900 + }, + { + "epoch": 5.07, + "learning_rate": 3.309252997233323e-05, + "loss": 2.8521, + "step": 198000 + }, + { + "epoch": 5.07, + "learning_rate": 3.308399084605663e-05, + "loss": 2.7973, + "step": 198100 + }, + { + "epoch": 5.08, + "learning_rate": 3.3075451719780035e-05, + "loss": 2.8427, + "step": 198200 + }, + { + "epoch": 5.08, + "learning_rate": 3.3066912593503436e-05, + "loss": 2.7293, + "step": 198300 + }, + { + "epoch": 5.08, + "learning_rate": 3.305837346722683e-05, + "loss": 2.7858, + "step": 198400 + }, + { + "epoch": 5.09, + "learning_rate": 3.304983434095024e-05, + "loss": 2.7012, + "step": 198500 + }, + { + "epoch": 5.09, + "learning_rate": 3.3041295214673634e-05, + "loss": 2.5829, + "step": 198600 + }, + { + "epoch": 5.09, + "learning_rate": 3.3032756088397036e-05, + "loss": 2.7131, + "step": 198700 + }, + { + "epoch": 5.09, + "learning_rate": 3.302421696212044e-05, + "loss": 2.8127, + "step": 198800 + }, + { + "epoch": 5.1, + "learning_rate": 3.301567783584384e-05, + "loss": 2.8022, + "step": 198900 + }, + { + "epoch": 5.1, + "learning_rate": 3.300713870956724e-05, + "loss": 2.5661, + "step": 199000 + }, + { + "epoch": 5.1, + "learning_rate": 3.299859958329064e-05, + "loss": 2.697, + "step": 199100 + }, + { + "epoch": 5.1, + "learning_rate": 3.2990060457014037e-05, + "loss": 2.8766, + "step": 199200 + }, + { + "epoch": 5.11, + "learning_rate": 3.2981521330737445e-05, + "loss": 2.7826, + "step": 199300 + }, + { + "epoch": 5.11, + "learning_rate": 3.297298220446084e-05, + "loss": 2.7409, + "step": 199400 + }, + { + "epoch": 5.11, + "learning_rate": 3.296444307818424e-05, + "loss": 2.773, + "step": 199500 + }, + { + "epoch": 5.11, + "learning_rate": 3.295590395190764e-05, + "loss": 2.9256, + "step": 199600 + }, + { + "epoch": 5.12, + "learning_rate": 3.2947364825631044e-05, + "loss": 2.817, + "step": 199700 + }, + { + "epoch": 5.12, + "learning_rate": 3.293882569935444e-05, + "loss": 2.7419, + "step": 199800 + }, + { + "epoch": 5.12, + "learning_rate": 3.293028657307785e-05, + "loss": 2.7595, + "step": 199900 + }, + { + "epoch": 5.12, + "learning_rate": 3.292174744680124e-05, + "loss": 2.7349, + "step": 200000 + }, + { + "epoch": 5.13, + "learning_rate": 3.2913208320524644e-05, + "loss": 2.6855, + "step": 200100 + }, + { + "epoch": 5.13, + "learning_rate": 3.2904669194248045e-05, + "loss": 2.8252, + "step": 200200 + }, + { + "epoch": 5.13, + "learning_rate": 3.289613006797145e-05, + "loss": 2.8531, + "step": 200300 + }, + { + "epoch": 5.13, + "learning_rate": 3.288759094169485e-05, + "loss": 2.7198, + "step": 200400 + }, + { + "epoch": 5.14, + "learning_rate": 3.287905181541825e-05, + "loss": 2.8435, + "step": 200500 + }, + { + "epoch": 5.14, + "learning_rate": 3.2870512689141645e-05, + "loss": 2.7236, + "step": 200600 + }, + { + "epoch": 5.14, + "learning_rate": 3.286197356286505e-05, + "loss": 2.7964, + "step": 200700 + }, + { + "epoch": 5.14, + "learning_rate": 3.285343443658845e-05, + "loss": 2.8058, + "step": 200800 + }, + { + "epoch": 5.15, + "learning_rate": 3.284489531031185e-05, + "loss": 2.6538, + "step": 200900 + }, + { + "epoch": 5.15, + "learning_rate": 3.283635618403525e-05, + "loss": 2.8613, + "step": 201000 + }, + { + "epoch": 5.15, + "learning_rate": 3.282781705775865e-05, + "loss": 2.8554, + "step": 201100 + }, + { + "epoch": 5.15, + "learning_rate": 3.2819277931482054e-05, + "loss": 2.6762, + "step": 201200 + }, + { + "epoch": 5.16, + "learning_rate": 3.2810738805205455e-05, + "loss": 2.772, + "step": 201300 + }, + { + "epoch": 5.16, + "learning_rate": 3.280219967892885e-05, + "loss": 2.5641, + "step": 201400 + }, + { + "epoch": 5.16, + "learning_rate": 3.279366055265226e-05, + "loss": 2.8266, + "step": 201500 + }, + { + "epoch": 5.16, + "learning_rate": 3.278512142637565e-05, + "loss": 3.0291, + "step": 201600 + }, + { + "epoch": 5.17, + "learning_rate": 3.2776582300099055e-05, + "loss": 2.648, + "step": 201700 + }, + { + "epoch": 5.17, + "learning_rate": 3.2768043173822456e-05, + "loss": 2.8076, + "step": 201800 + }, + { + "epoch": 5.17, + "learning_rate": 3.275950404754586e-05, + "loss": 2.7914, + "step": 201900 + }, + { + "epoch": 5.17, + "learning_rate": 3.275096492126925e-05, + "loss": 2.8401, + "step": 202000 + }, + { + "epoch": 5.18, + "learning_rate": 3.274242579499266e-05, + "loss": 2.6394, + "step": 202100 + }, + { + "epoch": 5.18, + "learning_rate": 3.2733886668716056e-05, + "loss": 2.8558, + "step": 202200 + }, + { + "epoch": 5.18, + "learning_rate": 3.272534754243946e-05, + "loss": 2.7711, + "step": 202300 + }, + { + "epoch": 5.18, + "learning_rate": 3.271680841616286e-05, + "loss": 2.9304, + "step": 202400 + }, + { + "epoch": 5.19, + "learning_rate": 3.270826928988626e-05, + "loss": 2.6178, + "step": 202500 + }, + { + "epoch": 5.19, + "learning_rate": 3.269973016360966e-05, + "loss": 2.8751, + "step": 202600 + }, + { + "epoch": 5.19, + "learning_rate": 3.2691191037333064e-05, + "loss": 2.7741, + "step": 202700 + }, + { + "epoch": 5.2, + "learning_rate": 3.268265191105646e-05, + "loss": 2.7994, + "step": 202800 + }, + { + "epoch": 5.2, + "learning_rate": 3.267411278477987e-05, + "loss": 2.7062, + "step": 202900 + }, + { + "epoch": 5.2, + "learning_rate": 3.266557365850326e-05, + "loss": 2.804, + "step": 203000 + }, + { + "epoch": 5.2, + "learning_rate": 3.265703453222666e-05, + "loss": 2.6745, + "step": 203100 + }, + { + "epoch": 5.21, + "learning_rate": 3.2648495405950065e-05, + "loss": 2.8282, + "step": 203200 + }, + { + "epoch": 5.21, + "learning_rate": 3.2639956279673466e-05, + "loss": 2.6831, + "step": 203300 + }, + { + "epoch": 5.21, + "learning_rate": 3.263141715339687e-05, + "loss": 2.7802, + "step": 203400 + }, + { + "epoch": 5.21, + "learning_rate": 3.262287802712027e-05, + "loss": 2.7884, + "step": 203500 + }, + { + "epoch": 5.22, + "learning_rate": 3.2614338900843664e-05, + "loss": 2.6893, + "step": 203600 + }, + { + "epoch": 5.22, + "learning_rate": 3.260579977456707e-05, + "loss": 2.8855, + "step": 203700 + }, + { + "epoch": 5.22, + "learning_rate": 3.259726064829047e-05, + "loss": 2.6815, + "step": 203800 + }, + { + "epoch": 5.22, + "learning_rate": 3.258872152201387e-05, + "loss": 2.7475, + "step": 203900 + }, + { + "epoch": 5.23, + "learning_rate": 3.258018239573727e-05, + "loss": 2.7349, + "step": 204000 + }, + { + "epoch": 5.23, + "learning_rate": 3.257164326946067e-05, + "loss": 2.6756, + "step": 204100 + }, + { + "epoch": 5.23, + "learning_rate": 3.256310414318407e-05, + "loss": 2.6423, + "step": 204200 + }, + { + "epoch": 5.23, + "learning_rate": 3.2554565016907475e-05, + "loss": 2.9113, + "step": 204300 + }, + { + "epoch": 5.24, + "learning_rate": 3.254602589063087e-05, + "loss": 2.797, + "step": 204400 + }, + { + "epoch": 5.24, + "learning_rate": 3.253748676435428e-05, + "loss": 2.7763, + "step": 204500 + }, + { + "epoch": 5.24, + "learning_rate": 3.252894763807767e-05, + "loss": 2.8551, + "step": 204600 + }, + { + "epoch": 5.24, + "learning_rate": 3.2520408511801074e-05, + "loss": 2.7685, + "step": 204700 + }, + { + "epoch": 5.25, + "learning_rate": 3.2511869385524476e-05, + "loss": 2.6483, + "step": 204800 + }, + { + "epoch": 5.25, + "learning_rate": 3.250333025924788e-05, + "loss": 2.7825, + "step": 204900 + }, + { + "epoch": 5.25, + "learning_rate": 3.249479113297127e-05, + "loss": 2.5934, + "step": 205000 + }, + { + "epoch": 5.25, + "learning_rate": 3.248625200669468e-05, + "loss": 2.7495, + "step": 205100 + }, + { + "epoch": 5.26, + "learning_rate": 3.2477712880418075e-05, + "loss": 2.629, + "step": 205200 + }, + { + "epoch": 5.26, + "learning_rate": 3.246917375414148e-05, + "loss": 2.8097, + "step": 205300 + }, + { + "epoch": 5.26, + "learning_rate": 3.246063462786488e-05, + "loss": 2.8791, + "step": 205400 + }, + { + "epoch": 5.26, + "learning_rate": 3.245209550158828e-05, + "loss": 2.8065, + "step": 205500 + }, + { + "epoch": 5.27, + "learning_rate": 3.244355637531168e-05, + "loss": 2.5965, + "step": 205600 + }, + { + "epoch": 5.27, + "learning_rate": 3.243501724903508e-05, + "loss": 2.8874, + "step": 205700 + }, + { + "epoch": 5.27, + "learning_rate": 3.242647812275848e-05, + "loss": 2.5646, + "step": 205800 + }, + { + "epoch": 5.27, + "learning_rate": 3.2417938996481886e-05, + "loss": 2.8783, + "step": 205900 + }, + { + "epoch": 5.28, + "learning_rate": 3.240939987020528e-05, + "loss": 2.8532, + "step": 206000 + }, + { + "epoch": 5.28, + "learning_rate": 3.240086074392868e-05, + "loss": 2.7758, + "step": 206100 + }, + { + "epoch": 5.28, + "learning_rate": 3.2392321617652084e-05, + "loss": 2.9672, + "step": 206200 + }, + { + "epoch": 5.28, + "learning_rate": 3.2383782491375485e-05, + "loss": 2.6679, + "step": 206300 + }, + { + "epoch": 5.29, + "learning_rate": 3.237524336509889e-05, + "loss": 2.68, + "step": 206400 + }, + { + "epoch": 5.29, + "learning_rate": 3.236670423882229e-05, + "loss": 2.9546, + "step": 206500 + }, + { + "epoch": 5.29, + "learning_rate": 3.235816511254568e-05, + "loss": 2.665, + "step": 206600 + }, + { + "epoch": 5.3, + "learning_rate": 3.234962598626909e-05, + "loss": 2.6909, + "step": 206700 + }, + { + "epoch": 5.3, + "learning_rate": 3.2341086859992486e-05, + "loss": 2.733, + "step": 206800 + }, + { + "epoch": 5.3, + "learning_rate": 3.233254773371589e-05, + "loss": 2.7725, + "step": 206900 + }, + { + "epoch": 5.3, + "learning_rate": 3.232400860743929e-05, + "loss": 2.975, + "step": 207000 + }, + { + "epoch": 5.31, + "learning_rate": 3.231546948116269e-05, + "loss": 2.8213, + "step": 207100 + }, + { + "epoch": 5.31, + "learning_rate": 3.2306930354886086e-05, + "loss": 2.6112, + "step": 207200 + }, + { + "epoch": 5.31, + "learning_rate": 3.2298391228609494e-05, + "loss": 2.8263, + "step": 207300 + }, + { + "epoch": 5.31, + "learning_rate": 3.228985210233289e-05, + "loss": 2.7241, + "step": 207400 + }, + { + "epoch": 5.32, + "learning_rate": 3.228131297605629e-05, + "loss": 2.7141, + "step": 207500 + }, + { + "epoch": 5.32, + "learning_rate": 3.227277384977969e-05, + "loss": 2.7914, + "step": 207600 + }, + { + "epoch": 5.32, + "learning_rate": 3.2264234723503094e-05, + "loss": 2.7239, + "step": 207700 + }, + { + "epoch": 5.32, + "learning_rate": 3.2255695597226495e-05, + "loss": 2.8226, + "step": 207800 + }, + { + "epoch": 5.33, + "learning_rate": 3.22471564709499e-05, + "loss": 2.7109, + "step": 207900 + }, + { + "epoch": 5.33, + "learning_rate": 3.223861734467329e-05, + "loss": 2.764, + "step": 208000 + }, + { + "epoch": 5.33, + "learning_rate": 3.22300782183967e-05, + "loss": 2.7809, + "step": 208100 + }, + { + "epoch": 5.33, + "learning_rate": 3.2221539092120095e-05, + "loss": 2.7088, + "step": 208200 + }, + { + "epoch": 5.34, + "learning_rate": 3.2212999965843496e-05, + "loss": 2.6485, + "step": 208300 + }, + { + "epoch": 5.34, + "learning_rate": 3.22044608395669e-05, + "loss": 2.7716, + "step": 208400 + }, + { + "epoch": 5.34, + "learning_rate": 3.21959217132903e-05, + "loss": 2.7955, + "step": 208500 + }, + { + "epoch": 5.34, + "learning_rate": 3.21873825870137e-05, + "loss": 2.7065, + "step": 208600 + }, + { + "epoch": 5.35, + "learning_rate": 3.21788434607371e-05, + "loss": 2.7836, + "step": 208700 + }, + { + "epoch": 5.35, + "learning_rate": 3.21703043344605e-05, + "loss": 2.7758, + "step": 208800 + }, + { + "epoch": 5.35, + "learning_rate": 3.2161765208183905e-05, + "loss": 2.7706, + "step": 208900 + }, + { + "epoch": 5.35, + "learning_rate": 3.21532260819073e-05, + "loss": 2.6964, + "step": 209000 + }, + { + "epoch": 5.36, + "learning_rate": 3.21446869556307e-05, + "loss": 2.9809, + "step": 209100 + }, + { + "epoch": 5.36, + "learning_rate": 3.21361478293541e-05, + "loss": 2.6947, + "step": 209200 + }, + { + "epoch": 5.36, + "learning_rate": 3.2127608703077505e-05, + "loss": 2.7077, + "step": 209300 + }, + { + "epoch": 5.36, + "learning_rate": 3.2119069576800906e-05, + "loss": 2.8423, + "step": 209400 + }, + { + "epoch": 5.37, + "learning_rate": 3.211053045052431e-05, + "loss": 2.7861, + "step": 209500 + }, + { + "epoch": 5.37, + "learning_rate": 3.21019913242477e-05, + "loss": 2.81, + "step": 209600 + }, + { + "epoch": 5.37, + "learning_rate": 3.209345219797111e-05, + "loss": 2.8115, + "step": 209700 + }, + { + "epoch": 5.37, + "learning_rate": 3.2084913071694506e-05, + "loss": 2.6927, + "step": 209800 + }, + { + "epoch": 5.38, + "learning_rate": 3.207637394541791e-05, + "loss": 2.8274, + "step": 209900 + }, + { + "epoch": 5.38, + "learning_rate": 3.206783481914131e-05, + "loss": 2.8401, + "step": 210000 + }, + { + "epoch": 5.38, + "learning_rate": 3.205929569286471e-05, + "loss": 2.9757, + "step": 210100 + }, + { + "epoch": 5.38, + "learning_rate": 3.2050756566588105e-05, + "loss": 2.5407, + "step": 210200 + }, + { + "epoch": 5.39, + "learning_rate": 3.2042217440311513e-05, + "loss": 2.6253, + "step": 210300 + }, + { + "epoch": 5.39, + "learning_rate": 3.203367831403491e-05, + "loss": 2.6943, + "step": 210400 + }, + { + "epoch": 5.39, + "learning_rate": 3.202513918775831e-05, + "loss": 2.8777, + "step": 210500 + }, + { + "epoch": 5.4, + "learning_rate": 3.201660006148171e-05, + "loss": 2.6651, + "step": 210600 + }, + { + "epoch": 5.4, + "learning_rate": 3.2008060935205106e-05, + "loss": 2.6112, + "step": 210700 + }, + { + "epoch": 5.4, + "learning_rate": 3.1999521808928514e-05, + "loss": 2.7957, + "step": 210800 + }, + { + "epoch": 5.4, + "learning_rate": 3.199098268265191e-05, + "loss": 2.8197, + "step": 210900 + }, + { + "epoch": 5.41, + "learning_rate": 3.198244355637531e-05, + "loss": 2.8579, + "step": 211000 + }, + { + "epoch": 5.41, + "learning_rate": 3.197390443009871e-05, + "loss": 2.8535, + "step": 211100 + }, + { + "epoch": 5.41, + "learning_rate": 3.1965365303822114e-05, + "loss": 2.813, + "step": 211200 + }, + { + "epoch": 5.41, + "learning_rate": 3.1956826177545515e-05, + "loss": 2.8407, + "step": 211300 + }, + { + "epoch": 5.42, + "learning_rate": 3.194828705126892e-05, + "loss": 2.6361, + "step": 211400 + }, + { + "epoch": 5.42, + "learning_rate": 3.193974792499231e-05, + "loss": 2.9215, + "step": 211500 + }, + { + "epoch": 5.42, + "learning_rate": 3.193120879871572e-05, + "loss": 2.8109, + "step": 211600 + }, + { + "epoch": 5.42, + "learning_rate": 3.1922669672439115e-05, + "loss": 2.7735, + "step": 211700 + }, + { + "epoch": 5.43, + "learning_rate": 3.1914130546162516e-05, + "loss": 2.5616, + "step": 211800 + }, + { + "epoch": 5.43, + "learning_rate": 3.190559141988592e-05, + "loss": 2.8323, + "step": 211900 + }, + { + "epoch": 5.43, + "learning_rate": 3.189705229360932e-05, + "loss": 2.6473, + "step": 212000 + }, + { + "epoch": 5.43, + "learning_rate": 3.1888513167332714e-05, + "loss": 2.6523, + "step": 212100 + }, + { + "epoch": 5.44, + "learning_rate": 3.187997404105612e-05, + "loss": 2.6755, + "step": 212200 + }, + { + "epoch": 5.44, + "learning_rate": 3.187143491477952e-05, + "loss": 2.6132, + "step": 212300 + }, + { + "epoch": 5.44, + "learning_rate": 3.186289578850292e-05, + "loss": 2.573, + "step": 212400 + }, + { + "epoch": 5.44, + "learning_rate": 3.185435666222632e-05, + "loss": 2.8654, + "step": 212500 + }, + { + "epoch": 5.45, + "learning_rate": 3.184581753594972e-05, + "loss": 2.7064, + "step": 212600 + }, + { + "epoch": 5.45, + "learning_rate": 3.1837278409673124e-05, + "loss": 2.9272, + "step": 212700 + }, + { + "epoch": 5.45, + "learning_rate": 3.1828739283396525e-05, + "loss": 2.8052, + "step": 212800 + }, + { + "epoch": 5.45, + "learning_rate": 3.182020015711992e-05, + "loss": 2.6925, + "step": 212900 + }, + { + "epoch": 5.46, + "learning_rate": 3.181166103084333e-05, + "loss": 2.7437, + "step": 213000 + }, + { + "epoch": 5.46, + "learning_rate": 3.180312190456672e-05, + "loss": 2.7572, + "step": 213100 + }, + { + "epoch": 5.46, + "learning_rate": 3.1794582778290125e-05, + "loss": 2.7736, + "step": 213200 + }, + { + "epoch": 5.46, + "learning_rate": 3.1786043652013526e-05, + "loss": 2.6393, + "step": 213300 + }, + { + "epoch": 5.47, + "learning_rate": 3.177750452573693e-05, + "loss": 2.8257, + "step": 213400 + }, + { + "epoch": 5.47, + "learning_rate": 3.176896539946033e-05, + "loss": 2.6962, + "step": 213500 + }, + { + "epoch": 5.47, + "learning_rate": 3.176042627318373e-05, + "loss": 2.7065, + "step": 213600 + }, + { + "epoch": 5.47, + "learning_rate": 3.1751887146907125e-05, + "loss": 2.7483, + "step": 213700 + }, + { + "epoch": 5.48, + "learning_rate": 3.1743348020630534e-05, + "loss": 2.7294, + "step": 213800 + }, + { + "epoch": 5.48, + "learning_rate": 3.173480889435393e-05, + "loss": 2.8052, + "step": 213900 + }, + { + "epoch": 5.48, + "learning_rate": 3.172626976807733e-05, + "loss": 2.643, + "step": 214000 + }, + { + "epoch": 5.48, + "learning_rate": 3.171773064180073e-05, + "loss": 2.8264, + "step": 214100 + }, + { + "epoch": 5.49, + "learning_rate": 3.170919151552413e-05, + "loss": 2.8191, + "step": 214200 + }, + { + "epoch": 5.49, + "learning_rate": 3.1700652389247535e-05, + "loss": 2.7863, + "step": 214300 + }, + { + "epoch": 5.49, + "learning_rate": 3.1692113262970936e-05, + "loss": 2.6596, + "step": 214400 + }, + { + "epoch": 5.49, + "learning_rate": 3.168357413669433e-05, + "loss": 2.6472, + "step": 214500 + }, + { + "epoch": 5.5, + "learning_rate": 3.167503501041774e-05, + "loss": 2.6921, + "step": 214600 + }, + { + "epoch": 5.5, + "learning_rate": 3.1666495884141134e-05, + "loss": 2.5574, + "step": 214700 + }, + { + "epoch": 5.5, + "learning_rate": 3.1657956757864536e-05, + "loss": 2.664, + "step": 214800 + }, + { + "epoch": 5.51, + "learning_rate": 3.164941763158794e-05, + "loss": 2.6997, + "step": 214900 + }, + { + "epoch": 5.51, + "learning_rate": 3.164087850531134e-05, + "loss": 2.64, + "step": 215000 + }, + { + "epoch": 5.51, + "learning_rate": 3.1632339379034734e-05, + "loss": 2.5975, + "step": 215100 + }, + { + "epoch": 5.51, + "learning_rate": 3.162380025275814e-05, + "loss": 2.875, + "step": 215200 + }, + { + "epoch": 5.52, + "learning_rate": 3.161526112648154e-05, + "loss": 2.7729, + "step": 215300 + }, + { + "epoch": 5.52, + "learning_rate": 3.160672200020494e-05, + "loss": 2.5971, + "step": 215400 + }, + { + "epoch": 5.52, + "learning_rate": 3.159818287392834e-05, + "loss": 2.9298, + "step": 215500 + }, + { + "epoch": 5.52, + "learning_rate": 3.158964374765174e-05, + "loss": 2.5974, + "step": 215600 + }, + { + "epoch": 5.53, + "learning_rate": 3.158110462137514e-05, + "loss": 2.7801, + "step": 215700 + }, + { + "epoch": 5.53, + "learning_rate": 3.1572565495098544e-05, + "loss": 2.6669, + "step": 215800 + }, + { + "epoch": 5.53, + "learning_rate": 3.156402636882194e-05, + "loss": 2.7291, + "step": 215900 + }, + { + "epoch": 5.53, + "learning_rate": 3.155548724254535e-05, + "loss": 2.9889, + "step": 216000 + }, + { + "epoch": 5.54, + "learning_rate": 3.154694811626874e-05, + "loss": 2.783, + "step": 216100 + }, + { + "epoch": 5.54, + "learning_rate": 3.1538408989992144e-05, + "loss": 2.7246, + "step": 216200 + }, + { + "epoch": 5.54, + "learning_rate": 3.1529869863715545e-05, + "loss": 2.6294, + "step": 216300 + }, + { + "epoch": 5.54, + "learning_rate": 3.152133073743895e-05, + "loss": 2.8448, + "step": 216400 + }, + { + "epoch": 5.55, + "learning_rate": 3.151279161116235e-05, + "loss": 2.4837, + "step": 216500 + }, + { + "epoch": 5.55, + "learning_rate": 3.150425248488575e-05, + "loss": 2.8523, + "step": 216600 + }, + { + "epoch": 5.55, + "learning_rate": 3.1495713358609145e-05, + "loss": 2.4857, + "step": 216700 + }, + { + "epoch": 5.55, + "learning_rate": 3.148717423233255e-05, + "loss": 2.7957, + "step": 216800 + }, + { + "epoch": 5.56, + "learning_rate": 3.147863510605595e-05, + "loss": 2.5642, + "step": 216900 + }, + { + "epoch": 5.56, + "learning_rate": 3.147009597977935e-05, + "loss": 2.8443, + "step": 217000 + }, + { + "epoch": 5.56, + "learning_rate": 3.146155685350275e-05, + "loss": 2.8137, + "step": 217100 + }, + { + "epoch": 5.56, + "learning_rate": 3.145301772722615e-05, + "loss": 2.6857, + "step": 217200 + }, + { + "epoch": 5.57, + "learning_rate": 3.144447860094955e-05, + "loss": 2.519, + "step": 217300 + }, + { + "epoch": 5.57, + "learning_rate": 3.1435939474672956e-05, + "loss": 2.5561, + "step": 217400 + }, + { + "epoch": 5.57, + "learning_rate": 3.142740034839635e-05, + "loss": 2.6158, + "step": 217500 + }, + { + "epoch": 5.57, + "learning_rate": 3.141886122211975e-05, + "loss": 2.7535, + "step": 217600 + }, + { + "epoch": 5.58, + "learning_rate": 3.1410322095843154e-05, + "loss": 2.5762, + "step": 217700 + }, + { + "epoch": 5.58, + "learning_rate": 3.1401782969566555e-05, + "loss": 2.7406, + "step": 217800 + }, + { + "epoch": 5.58, + "learning_rate": 3.139324384328996e-05, + "loss": 2.6391, + "step": 217900 + }, + { + "epoch": 5.58, + "learning_rate": 3.138470471701336e-05, + "loss": 2.6371, + "step": 218000 + }, + { + "epoch": 5.59, + "learning_rate": 3.137616559073675e-05, + "loss": 2.5723, + "step": 218100 + }, + { + "epoch": 5.59, + "learning_rate": 3.136762646446016e-05, + "loss": 2.6901, + "step": 218200 + }, + { + "epoch": 5.59, + "learning_rate": 3.1359087338183556e-05, + "loss": 2.4828, + "step": 218300 + }, + { + "epoch": 5.59, + "learning_rate": 3.135054821190696e-05, + "loss": 2.7638, + "step": 218400 + }, + { + "epoch": 5.6, + "learning_rate": 3.134200908563036e-05, + "loss": 2.7363, + "step": 218500 + }, + { + "epoch": 5.6, + "learning_rate": 3.133346995935376e-05, + "loss": 2.6146, + "step": 218600 + }, + { + "epoch": 5.6, + "learning_rate": 3.132493083307716e-05, + "loss": 2.7168, + "step": 218700 + }, + { + "epoch": 5.61, + "learning_rate": 3.1316391706800564e-05, + "loss": 2.7791, + "step": 218800 + }, + { + "epoch": 5.61, + "learning_rate": 3.130785258052396e-05, + "loss": 2.8559, + "step": 218900 + }, + { + "epoch": 5.61, + "learning_rate": 3.129931345424737e-05, + "loss": 2.6361, + "step": 219000 + }, + { + "epoch": 5.61, + "learning_rate": 3.129077432797076e-05, + "loss": 2.7913, + "step": 219100 + }, + { + "epoch": 5.62, + "learning_rate": 3.128223520169416e-05, + "loss": 2.7234, + "step": 219200 + }, + { + "epoch": 5.62, + "learning_rate": 3.1273696075417565e-05, + "loss": 2.9367, + "step": 219300 + }, + { + "epoch": 5.62, + "learning_rate": 3.1265156949140966e-05, + "loss": 2.8618, + "step": 219400 + }, + { + "epoch": 5.62, + "learning_rate": 3.125661782286437e-05, + "loss": 2.7544, + "step": 219500 + }, + { + "epoch": 5.63, + "learning_rate": 3.124807869658777e-05, + "loss": 2.6748, + "step": 219600 + }, + { + "epoch": 5.63, + "learning_rate": 3.1239539570311164e-05, + "loss": 2.7111, + "step": 219700 + }, + { + "epoch": 5.63, + "learning_rate": 3.123100044403457e-05, + "loss": 2.7402, + "step": 219800 + }, + { + "epoch": 5.63, + "learning_rate": 3.122246131775797e-05, + "loss": 2.6942, + "step": 219900 + }, + { + "epoch": 5.64, + "learning_rate": 3.121392219148137e-05, + "loss": 2.7211, + "step": 220000 + }, + { + "epoch": 5.64, + "learning_rate": 3.120538306520477e-05, + "loss": 2.649, + "step": 220100 + }, + { + "epoch": 5.64, + "learning_rate": 3.119684393892817e-05, + "loss": 2.8113, + "step": 220200 + }, + { + "epoch": 5.64, + "learning_rate": 3.118830481265157e-05, + "loss": 2.6578, + "step": 220300 + }, + { + "epoch": 5.65, + "learning_rate": 3.1179765686374975e-05, + "loss": 2.8545, + "step": 220400 + }, + { + "epoch": 5.65, + "learning_rate": 3.117122656009837e-05, + "loss": 2.9633, + "step": 220500 + }, + { + "epoch": 5.65, + "learning_rate": 3.116268743382177e-05, + "loss": 2.8168, + "step": 220600 + }, + { + "epoch": 5.65, + "learning_rate": 3.115414830754517e-05, + "loss": 2.6067, + "step": 220700 + }, + { + "epoch": 5.66, + "learning_rate": 3.1145609181268574e-05, + "loss": 2.6563, + "step": 220800 + }, + { + "epoch": 5.66, + "learning_rate": 3.1137070054991976e-05, + "loss": 2.6782, + "step": 220900 + }, + { + "epoch": 5.66, + "learning_rate": 3.112853092871538e-05, + "loss": 2.5729, + "step": 221000 + }, + { + "epoch": 5.66, + "learning_rate": 3.111999180243877e-05, + "loss": 2.6424, + "step": 221100 + }, + { + "epoch": 5.67, + "learning_rate": 3.111145267616218e-05, + "loss": 2.6948, + "step": 221200 + }, + { + "epoch": 5.67, + "learning_rate": 3.1102913549885575e-05, + "loss": 2.6781, + "step": 221300 + }, + { + "epoch": 5.67, + "learning_rate": 3.109437442360898e-05, + "loss": 2.6164, + "step": 221400 + }, + { + "epoch": 5.67, + "learning_rate": 3.108583529733238e-05, + "loss": 2.7599, + "step": 221500 + }, + { + "epoch": 5.68, + "learning_rate": 3.107729617105578e-05, + "loss": 2.6885, + "step": 221600 + }, + { + "epoch": 5.68, + "learning_rate": 3.106875704477918e-05, + "loss": 2.7714, + "step": 221700 + }, + { + "epoch": 5.68, + "learning_rate": 3.106021791850258e-05, + "loss": 2.8554, + "step": 221800 + }, + { + "epoch": 5.68, + "learning_rate": 3.105167879222598e-05, + "loss": 2.8403, + "step": 221900 + }, + { + "epoch": 5.69, + "learning_rate": 3.1043139665949386e-05, + "loss": 2.7891, + "step": 222000 + }, + { + "epoch": 5.69, + "learning_rate": 3.103460053967278e-05, + "loss": 2.7963, + "step": 222100 + }, + { + "epoch": 5.69, + "learning_rate": 3.102606141339618e-05, + "loss": 2.8312, + "step": 222200 + }, + { + "epoch": 5.69, + "learning_rate": 3.1017522287119584e-05, + "loss": 2.5519, + "step": 222300 + }, + { + "epoch": 5.7, + "learning_rate": 3.1008983160842986e-05, + "loss": 2.7113, + "step": 222400 + }, + { + "epoch": 5.7, + "learning_rate": 3.100044403456639e-05, + "loss": 2.6592, + "step": 222500 + }, + { + "epoch": 5.7, + "learning_rate": 3.099190490828979e-05, + "loss": 2.7181, + "step": 222600 + }, + { + "epoch": 5.7, + "learning_rate": 3.0983365782013184e-05, + "loss": 2.6204, + "step": 222700 + }, + { + "epoch": 5.71, + "learning_rate": 3.097482665573659e-05, + "loss": 2.6717, + "step": 222800 + }, + { + "epoch": 5.71, + "learning_rate": 3.0966287529459987e-05, + "loss": 2.8013, + "step": 222900 + }, + { + "epoch": 5.71, + "learning_rate": 3.095774840318339e-05, + "loss": 2.7697, + "step": 223000 + }, + { + "epoch": 5.72, + "learning_rate": 3.094920927690679e-05, + "loss": 2.6964, + "step": 223100 + }, + { + "epoch": 5.72, + "learning_rate": 3.094067015063019e-05, + "loss": 2.7323, + "step": 223200 + }, + { + "epoch": 5.72, + "learning_rate": 3.0932131024353586e-05, + "loss": 2.6465, + "step": 223300 + }, + { + "epoch": 5.72, + "learning_rate": 3.0923591898076994e-05, + "loss": 2.6533, + "step": 223400 + }, + { + "epoch": 5.73, + "learning_rate": 3.091505277180039e-05, + "loss": 2.5259, + "step": 223500 + }, + { + "epoch": 5.73, + "learning_rate": 3.090651364552379e-05, + "loss": 2.943, + "step": 223600 + }, + { + "epoch": 5.73, + "learning_rate": 3.089797451924719e-05, + "loss": 2.6762, + "step": 223700 + }, + { + "epoch": 5.73, + "learning_rate": 3.0889435392970594e-05, + "loss": 2.8889, + "step": 223800 + }, + { + "epoch": 5.74, + "learning_rate": 3.0880896266693995e-05, + "loss": 2.907, + "step": 223900 + }, + { + "epoch": 5.74, + "learning_rate": 3.08723571404174e-05, + "loss": 2.769, + "step": 224000 + }, + { + "epoch": 5.74, + "learning_rate": 3.086381801414079e-05, + "loss": 2.8173, + "step": 224100 + }, + { + "epoch": 5.74, + "learning_rate": 3.08552788878642e-05, + "loss": 2.6643, + "step": 224200 + }, + { + "epoch": 5.75, + "learning_rate": 3.0846739761587595e-05, + "loss": 2.855, + "step": 224300 + }, + { + "epoch": 5.75, + "learning_rate": 3.0838200635310996e-05, + "loss": 2.836, + "step": 224400 + }, + { + "epoch": 5.75, + "learning_rate": 3.08296615090344e-05, + "loss": 2.703, + "step": 224500 + }, + { + "epoch": 5.75, + "learning_rate": 3.08211223827578e-05, + "loss": 2.8927, + "step": 224600 + }, + { + "epoch": 5.76, + "learning_rate": 3.08125832564812e-05, + "loss": 2.6234, + "step": 224700 + }, + { + "epoch": 5.76, + "learning_rate": 3.08040441302046e-05, + "loss": 2.715, + "step": 224800 + }, + { + "epoch": 5.76, + "learning_rate": 3.0795505003928e-05, + "loss": 2.6856, + "step": 224900 + }, + { + "epoch": 5.76, + "learning_rate": 3.0786965877651406e-05, + "loss": 2.6405, + "step": 225000 + }, + { + "epoch": 5.77, + "learning_rate": 3.07784267513748e-05, + "loss": 2.6382, + "step": 225100 + }, + { + "epoch": 5.77, + "learning_rate": 3.07698876250982e-05, + "loss": 2.6424, + "step": 225200 + }, + { + "epoch": 5.77, + "learning_rate": 3.0761348498821603e-05, + "loss": 2.6423, + "step": 225300 + }, + { + "epoch": 5.77, + "learning_rate": 3.0752809372545005e-05, + "loss": 2.7959, + "step": 225400 + }, + { + "epoch": 5.78, + "learning_rate": 3.07442702462684e-05, + "loss": 2.9217, + "step": 225500 + }, + { + "epoch": 5.78, + "learning_rate": 3.073573111999181e-05, + "loss": 2.8175, + "step": 225600 + }, + { + "epoch": 5.78, + "learning_rate": 3.07271919937152e-05, + "loss": 2.6331, + "step": 225700 + }, + { + "epoch": 5.78, + "learning_rate": 3.0718652867438604e-05, + "loss": 2.7062, + "step": 225800 + }, + { + "epoch": 5.79, + "learning_rate": 3.0710113741162006e-05, + "loss": 2.6827, + "step": 225900 + }, + { + "epoch": 5.79, + "learning_rate": 3.070157461488541e-05, + "loss": 2.6571, + "step": 226000 + }, + { + "epoch": 5.79, + "learning_rate": 3.069303548860881e-05, + "loss": 2.5765, + "step": 226100 + }, + { + "epoch": 5.79, + "learning_rate": 3.068449636233221e-05, + "loss": 2.7972, + "step": 226200 + }, + { + "epoch": 5.8, + "learning_rate": 3.0675957236055605e-05, + "loss": 2.7939, + "step": 226300 + }, + { + "epoch": 5.8, + "learning_rate": 3.0667418109779014e-05, + "loss": 2.6674, + "step": 226400 + }, + { + "epoch": 5.8, + "learning_rate": 3.065887898350241e-05, + "loss": 2.6047, + "step": 226500 + }, + { + "epoch": 5.8, + "learning_rate": 3.065033985722581e-05, + "loss": 2.5639, + "step": 226600 + }, + { + "epoch": 5.81, + "learning_rate": 3.064180073094921e-05, + "loss": 2.7367, + "step": 226700 + }, + { + "epoch": 5.81, + "learning_rate": 3.063326160467261e-05, + "loss": 2.879, + "step": 226800 + }, + { + "epoch": 5.81, + "learning_rate": 3.0624722478396015e-05, + "loss": 2.8679, + "step": 226900 + }, + { + "epoch": 5.82, + "learning_rate": 3.0616183352119416e-05, + "loss": 2.6516, + "step": 227000 + }, + { + "epoch": 5.82, + "learning_rate": 3.060764422584281e-05, + "loss": 2.727, + "step": 227100 + }, + { + "epoch": 5.82, + "learning_rate": 3.059910509956622e-05, + "loss": 2.7692, + "step": 227200 + }, + { + "epoch": 5.82, + "learning_rate": 3.0590565973289614e-05, + "loss": 2.6338, + "step": 227300 + }, + { + "epoch": 5.83, + "learning_rate": 3.0582026847013016e-05, + "loss": 2.6727, + "step": 227400 + }, + { + "epoch": 5.83, + "learning_rate": 3.057348772073642e-05, + "loss": 2.5125, + "step": 227500 + }, + { + "epoch": 5.83, + "learning_rate": 3.056494859445982e-05, + "loss": 2.8578, + "step": 227600 + }, + { + "epoch": 5.83, + "learning_rate": 3.055640946818322e-05, + "loss": 2.792, + "step": 227700 + }, + { + "epoch": 5.84, + "learning_rate": 3.054787034190662e-05, + "loss": 2.6018, + "step": 227800 + }, + { + "epoch": 5.84, + "learning_rate": 3.0539331215630017e-05, + "loss": 2.7506, + "step": 227900 + }, + { + "epoch": 5.84, + "learning_rate": 3.053079208935342e-05, + "loss": 2.937, + "step": 228000 + }, + { + "epoch": 5.84, + "learning_rate": 3.052225296307682e-05, + "loss": 2.4773, + "step": 228100 + }, + { + "epoch": 5.85, + "learning_rate": 3.0513713836800218e-05, + "loss": 2.745, + "step": 228200 + }, + { + "epoch": 5.85, + "learning_rate": 3.050517471052362e-05, + "loss": 2.6591, + "step": 228300 + }, + { + "epoch": 5.85, + "learning_rate": 3.0496635584247018e-05, + "loss": 2.8307, + "step": 228400 + }, + { + "epoch": 5.85, + "learning_rate": 3.0488096457970422e-05, + "loss": 2.732, + "step": 228500 + }, + { + "epoch": 5.86, + "learning_rate": 3.047955733169382e-05, + "loss": 2.703, + "step": 228600 + }, + { + "epoch": 5.86, + "learning_rate": 3.0471018205417222e-05, + "loss": 2.7368, + "step": 228700 + }, + { + "epoch": 5.86, + "learning_rate": 3.046247907914062e-05, + "loss": 2.7134, + "step": 228800 + }, + { + "epoch": 5.86, + "learning_rate": 3.0453939952864025e-05, + "loss": 2.717, + "step": 228900 + }, + { + "epoch": 5.87, + "learning_rate": 3.0445400826587423e-05, + "loss": 2.6813, + "step": 229000 + }, + { + "epoch": 5.87, + "learning_rate": 3.0436861700310825e-05, + "loss": 2.7051, + "step": 229100 + }, + { + "epoch": 5.87, + "learning_rate": 3.0428322574034223e-05, + "loss": 2.4863, + "step": 229200 + }, + { + "epoch": 5.87, + "learning_rate": 3.0419783447757628e-05, + "loss": 2.8049, + "step": 229300 + }, + { + "epoch": 5.88, + "learning_rate": 3.0411244321481026e-05, + "loss": 2.7755, + "step": 229400 + }, + { + "epoch": 5.88, + "learning_rate": 3.0402705195204428e-05, + "loss": 2.9066, + "step": 229500 + }, + { + "epoch": 5.88, + "learning_rate": 3.0394166068927826e-05, + "loss": 3.0177, + "step": 229600 + }, + { + "epoch": 5.88, + "learning_rate": 3.038562694265123e-05, + "loss": 2.6426, + "step": 229700 + }, + { + "epoch": 5.89, + "learning_rate": 3.037708781637463e-05, + "loss": 2.7463, + "step": 229800 + }, + { + "epoch": 5.89, + "learning_rate": 3.036854869009803e-05, + "loss": 2.8526, + "step": 229900 + }, + { + "epoch": 5.89, + "learning_rate": 3.036000956382143e-05, + "loss": 2.9398, + "step": 230000 + }, + { + "epoch": 5.89, + "learning_rate": 3.0351470437544834e-05, + "loss": 2.5734, + "step": 230100 + }, + { + "epoch": 5.9, + "learning_rate": 3.034293131126823e-05, + "loss": 2.7124, + "step": 230200 + }, + { + "epoch": 5.9, + "learning_rate": 3.0334392184991633e-05, + "loss": 2.8195, + "step": 230300 + }, + { + "epoch": 5.9, + "learning_rate": 3.032585305871503e-05, + "loss": 2.7899, + "step": 230400 + }, + { + "epoch": 5.9, + "learning_rate": 3.0317313932438433e-05, + "loss": 2.7026, + "step": 230500 + }, + { + "epoch": 5.91, + "learning_rate": 3.030877480616183e-05, + "loss": 2.6566, + "step": 230600 + }, + { + "epoch": 5.91, + "learning_rate": 3.0300235679885236e-05, + "loss": 2.6287, + "step": 230700 + }, + { + "epoch": 5.91, + "learning_rate": 3.0291696553608634e-05, + "loss": 2.8339, + "step": 230800 + }, + { + "epoch": 5.92, + "learning_rate": 3.0283157427332036e-05, + "loss": 2.8427, + "step": 230900 + }, + { + "epoch": 5.92, + "learning_rate": 3.0274618301055434e-05, + "loss": 2.664, + "step": 231000 + }, + { + "epoch": 5.92, + "learning_rate": 3.026607917477884e-05, + "loss": 2.6706, + "step": 231100 + }, + { + "epoch": 5.92, + "learning_rate": 3.0257540048502237e-05, + "loss": 2.7868, + "step": 231200 + }, + { + "epoch": 5.93, + "learning_rate": 3.024900092222564e-05, + "loss": 2.9513, + "step": 231300 + }, + { + "epoch": 5.93, + "learning_rate": 3.0240461795949037e-05, + "loss": 2.7365, + "step": 231400 + }, + { + "epoch": 5.93, + "learning_rate": 3.0231922669672442e-05, + "loss": 2.5798, + "step": 231500 + }, + { + "epoch": 5.93, + "learning_rate": 3.022338354339584e-05, + "loss": 2.4588, + "step": 231600 + }, + { + "epoch": 5.94, + "learning_rate": 3.021484441711924e-05, + "loss": 2.84, + "step": 231700 + }, + { + "epoch": 5.94, + "learning_rate": 3.020630529084264e-05, + "loss": 2.7311, + "step": 231800 + }, + { + "epoch": 5.94, + "learning_rate": 3.0197766164566045e-05, + "loss": 2.6963, + "step": 231900 + }, + { + "epoch": 5.94, + "learning_rate": 3.0189227038289443e-05, + "loss": 2.7015, + "step": 232000 + }, + { + "epoch": 5.95, + "learning_rate": 3.0180687912012844e-05, + "loss": 2.8284, + "step": 232100 + }, + { + "epoch": 5.95, + "learning_rate": 3.0172148785736242e-05, + "loss": 2.616, + "step": 232200 + }, + { + "epoch": 5.95, + "learning_rate": 3.0163609659459647e-05, + "loss": 2.6611, + "step": 232300 + }, + { + "epoch": 5.95, + "learning_rate": 3.0155070533183046e-05, + "loss": 2.8142, + "step": 232400 + }, + { + "epoch": 5.96, + "learning_rate": 3.0146531406906447e-05, + "loss": 2.6198, + "step": 232500 + }, + { + "epoch": 5.96, + "learning_rate": 3.0137992280629845e-05, + "loss": 2.7892, + "step": 232600 + }, + { + "epoch": 5.96, + "learning_rate": 3.012945315435325e-05, + "loss": 2.7212, + "step": 232700 + }, + { + "epoch": 5.96, + "learning_rate": 3.0120914028076645e-05, + "loss": 2.6871, + "step": 232800 + }, + { + "epoch": 5.97, + "learning_rate": 3.011237490180005e-05, + "loss": 2.6104, + "step": 232900 + }, + { + "epoch": 5.97, + "learning_rate": 3.0103835775523448e-05, + "loss": 2.5973, + "step": 233000 + }, + { + "epoch": 5.97, + "learning_rate": 3.009529664924685e-05, + "loss": 2.7518, + "step": 233100 + }, + { + "epoch": 5.97, + "learning_rate": 3.0086757522970248e-05, + "loss": 2.6271, + "step": 233200 + }, + { + "epoch": 5.98, + "learning_rate": 3.0078218396693653e-05, + "loss": 2.7272, + "step": 233300 + }, + { + "epoch": 5.98, + "learning_rate": 3.006967927041705e-05, + "loss": 2.5538, + "step": 233400 + }, + { + "epoch": 5.98, + "learning_rate": 3.0061140144140452e-05, + "loss": 2.7496, + "step": 233500 + }, + { + "epoch": 5.98, + "learning_rate": 3.005260101786385e-05, + "loss": 2.7422, + "step": 233600 + }, + { + "epoch": 5.99, + "learning_rate": 3.0044061891587256e-05, + "loss": 2.6432, + "step": 233700 + }, + { + "epoch": 5.99, + "learning_rate": 3.0035522765310654e-05, + "loss": 2.6889, + "step": 233800 + }, + { + "epoch": 5.99, + "learning_rate": 3.0026983639034055e-05, + "loss": 2.7468, + "step": 233900 + }, + { + "epoch": 5.99, + "learning_rate": 3.0018444512757453e-05, + "loss": 2.834, + "step": 234000 + }, + { + "epoch": 6.0, + "learning_rate": 3.000990538648086e-05, + "loss": 2.7115, + "step": 234100 + }, + { + "epoch": 6.0, + "learning_rate": 3.0001366260204257e-05, + "loss": 2.5906, + "step": 234200 + }, + { + "epoch": 6.0, + "learning_rate": 2.9992827133927658e-05, + "loss": 2.6469, + "step": 234300 + }, + { + "epoch": 6.0, + "learning_rate": 2.9984288007651056e-05, + "loss": 2.7775, + "step": 234400 + }, + { + "epoch": 6.01, + "learning_rate": 2.997574888137446e-05, + "loss": 2.7041, + "step": 234500 + }, + { + "epoch": 6.01, + "learning_rate": 2.996720975509786e-05, + "loss": 2.7634, + "step": 234600 + }, + { + "epoch": 6.01, + "learning_rate": 2.995867062882126e-05, + "loss": 2.4696, + "step": 234700 + }, + { + "epoch": 6.01, + "learning_rate": 2.995013150254466e-05, + "loss": 2.7226, + "step": 234800 + }, + { + "epoch": 6.02, + "learning_rate": 2.9941592376268064e-05, + "loss": 2.67, + "step": 234900 + }, + { + "epoch": 6.02, + "learning_rate": 2.9933053249991462e-05, + "loss": 2.7104, + "step": 235000 + }, + { + "epoch": 6.02, + "learning_rate": 2.9924514123714864e-05, + "loss": 2.8479, + "step": 235100 + }, + { + "epoch": 6.03, + "learning_rate": 2.9915974997438262e-05, + "loss": 2.7838, + "step": 235200 + }, + { + "epoch": 6.03, + "learning_rate": 2.9907435871161667e-05, + "loss": 2.6987, + "step": 235300 + }, + { + "epoch": 6.03, + "learning_rate": 2.989889674488506e-05, + "loss": 2.6483, + "step": 235400 + }, + { + "epoch": 6.03, + "learning_rate": 2.9890357618608466e-05, + "loss": 2.5661, + "step": 235500 + }, + { + "epoch": 6.04, + "learning_rate": 2.9881818492331865e-05, + "loss": 2.6686, + "step": 235600 + }, + { + "epoch": 6.04, + "learning_rate": 2.987327936605527e-05, + "loss": 2.688, + "step": 235700 + }, + { + "epoch": 6.04, + "learning_rate": 2.9864740239778664e-05, + "loss": 2.76, + "step": 235800 + }, + { + "epoch": 6.04, + "learning_rate": 2.985620111350207e-05, + "loss": 2.6832, + "step": 235900 + }, + { + "epoch": 6.05, + "learning_rate": 2.9847661987225467e-05, + "loss": 2.6808, + "step": 236000 + }, + { + "epoch": 6.05, + "learning_rate": 2.983912286094887e-05, + "loss": 2.7677, + "step": 236100 + }, + { + "epoch": 6.05, + "learning_rate": 2.9830583734672267e-05, + "loss": 2.6441, + "step": 236200 + }, + { + "epoch": 6.05, + "learning_rate": 2.9822044608395672e-05, + "loss": 2.7636, + "step": 236300 + }, + { + "epoch": 6.06, + "learning_rate": 2.981350548211907e-05, + "loss": 2.7828, + "step": 236400 + }, + { + "epoch": 6.06, + "learning_rate": 2.9804966355842472e-05, + "loss": 2.5797, + "step": 236500 + }, + { + "epoch": 6.06, + "learning_rate": 2.979642722956587e-05, + "loss": 2.7867, + "step": 236600 + }, + { + "epoch": 6.06, + "learning_rate": 2.9787888103289275e-05, + "loss": 2.6842, + "step": 236700 + }, + { + "epoch": 6.07, + "learning_rate": 2.9779348977012673e-05, + "loss": 2.7861, + "step": 236800 + }, + { + "epoch": 6.07, + "learning_rate": 2.9770809850736075e-05, + "loss": 2.6882, + "step": 236900 + }, + { + "epoch": 6.07, + "learning_rate": 2.9762270724459473e-05, + "loss": 2.8817, + "step": 237000 + }, + { + "epoch": 6.07, + "learning_rate": 2.9753731598182878e-05, + "loss": 2.9218, + "step": 237100 + }, + { + "epoch": 6.08, + "learning_rate": 2.9745192471906276e-05, + "loss": 2.7078, + "step": 237200 + }, + { + "epoch": 6.08, + "learning_rate": 2.9736653345629677e-05, + "loss": 2.6177, + "step": 237300 + }, + { + "epoch": 6.08, + "learning_rate": 2.9728114219353076e-05, + "loss": 2.667, + "step": 237400 + }, + { + "epoch": 6.08, + "learning_rate": 2.971957509307648e-05, + "loss": 2.8412, + "step": 237500 + }, + { + "epoch": 6.09, + "learning_rate": 2.971103596679988e-05, + "loss": 2.7651, + "step": 237600 + }, + { + "epoch": 6.09, + "learning_rate": 2.970249684052328e-05, + "loss": 2.5657, + "step": 237700 + }, + { + "epoch": 6.09, + "learning_rate": 2.969395771424668e-05, + "loss": 2.4972, + "step": 237800 + }, + { + "epoch": 6.09, + "learning_rate": 2.9685418587970083e-05, + "loss": 2.6563, + "step": 237900 + }, + { + "epoch": 6.1, + "learning_rate": 2.9676879461693478e-05, + "loss": 2.7086, + "step": 238000 + }, + { + "epoch": 6.1, + "learning_rate": 2.9668340335416883e-05, + "loss": 2.4449, + "step": 238100 + }, + { + "epoch": 6.1, + "learning_rate": 2.965980120914028e-05, + "loss": 2.5057, + "step": 238200 + }, + { + "epoch": 6.1, + "learning_rate": 2.9651262082863686e-05, + "loss": 2.7743, + "step": 238300 + }, + { + "epoch": 6.11, + "learning_rate": 2.964272295658708e-05, + "loss": 2.6181, + "step": 238400 + }, + { + "epoch": 6.11, + "learning_rate": 2.9634183830310486e-05, + "loss": 2.6679, + "step": 238500 + }, + { + "epoch": 6.11, + "learning_rate": 2.9625644704033884e-05, + "loss": 2.908, + "step": 238600 + }, + { + "epoch": 6.11, + "learning_rate": 2.9617105577757286e-05, + "loss": 2.4055, + "step": 238700 + }, + { + "epoch": 6.12, + "learning_rate": 2.9608566451480684e-05, + "loss": 2.7305, + "step": 238800 + }, + { + "epoch": 6.12, + "learning_rate": 2.960002732520409e-05, + "loss": 2.7786, + "step": 238900 + }, + { + "epoch": 6.12, + "learning_rate": 2.9591488198927487e-05, + "loss": 2.7557, + "step": 239000 + }, + { + "epoch": 6.13, + "learning_rate": 2.958294907265089e-05, + "loss": 2.6417, + "step": 239100 + }, + { + "epoch": 6.13, + "learning_rate": 2.9574409946374286e-05, + "loss": 2.7198, + "step": 239200 + }, + { + "epoch": 6.13, + "learning_rate": 2.956587082009769e-05, + "loss": 2.7358, + "step": 239300 + }, + { + "epoch": 6.13, + "learning_rate": 2.955733169382109e-05, + "loss": 2.6815, + "step": 239400 + }, + { + "epoch": 6.14, + "learning_rate": 2.954879256754449e-05, + "loss": 2.6953, + "step": 239500 + }, + { + "epoch": 6.14, + "learning_rate": 2.954025344126789e-05, + "loss": 2.8371, + "step": 239600 + }, + { + "epoch": 6.14, + "learning_rate": 2.9531714314991294e-05, + "loss": 2.8867, + "step": 239700 + }, + { + "epoch": 6.14, + "learning_rate": 2.9523175188714692e-05, + "loss": 2.7678, + "step": 239800 + }, + { + "epoch": 6.15, + "learning_rate": 2.9514636062438094e-05, + "loss": 2.4464, + "step": 239900 + }, + { + "epoch": 6.15, + "learning_rate": 2.9506096936161492e-05, + "loss": 2.6759, + "step": 240000 + }, + { + "epoch": 6.15, + "learning_rate": 2.9497557809884897e-05, + "loss": 2.9032, + "step": 240100 + }, + { + "epoch": 6.15, + "learning_rate": 2.9489018683608295e-05, + "loss": 2.7916, + "step": 240200 + }, + { + "epoch": 6.16, + "learning_rate": 2.9480479557331697e-05, + "loss": 2.7683, + "step": 240300 + }, + { + "epoch": 6.16, + "learning_rate": 2.9471940431055095e-05, + "loss": 2.8258, + "step": 240400 + }, + { + "epoch": 6.16, + "learning_rate": 2.94634013047785e-05, + "loss": 2.5918, + "step": 240500 + }, + { + "epoch": 6.16, + "learning_rate": 2.9454862178501898e-05, + "loss": 2.7946, + "step": 240600 + }, + { + "epoch": 6.17, + "learning_rate": 2.94463230522253e-05, + "loss": 2.818, + "step": 240700 + }, + { + "epoch": 6.17, + "learning_rate": 2.9437783925948698e-05, + "loss": 2.6471, + "step": 240800 + }, + { + "epoch": 6.17, + "learning_rate": 2.9429244799672103e-05, + "loss": 2.6668, + "step": 240900 + }, + { + "epoch": 6.17, + "learning_rate": 2.9420705673395497e-05, + "loss": 2.7814, + "step": 241000 + }, + { + "epoch": 6.18, + "learning_rate": 2.9412166547118902e-05, + "loss": 2.7499, + "step": 241100 + }, + { + "epoch": 6.18, + "learning_rate": 2.94036274208423e-05, + "loss": 2.7778, + "step": 241200 + }, + { + "epoch": 6.18, + "learning_rate": 2.9395088294565702e-05, + "loss": 2.6787, + "step": 241300 + }, + { + "epoch": 6.18, + "learning_rate": 2.93865491682891e-05, + "loss": 2.7438, + "step": 241400 + }, + { + "epoch": 6.19, + "learning_rate": 2.9378010042012505e-05, + "loss": 2.7875, + "step": 241500 + }, + { + "epoch": 6.19, + "learning_rate": 2.9369470915735903e-05, + "loss": 2.6229, + "step": 241600 + }, + { + "epoch": 6.19, + "learning_rate": 2.9360931789459305e-05, + "loss": 2.736, + "step": 241700 + }, + { + "epoch": 6.19, + "learning_rate": 2.9352392663182703e-05, + "loss": 2.6908, + "step": 241800 + }, + { + "epoch": 6.2, + "learning_rate": 2.9343853536906108e-05, + "loss": 2.6863, + "step": 241900 + }, + { + "epoch": 6.2, + "learning_rate": 2.9335314410629506e-05, + "loss": 2.6862, + "step": 242000 + }, + { + "epoch": 6.2, + "learning_rate": 2.9326775284352908e-05, + "loss": 2.7347, + "step": 242100 + }, + { + "epoch": 6.2, + "learning_rate": 2.9318236158076306e-05, + "loss": 2.615, + "step": 242200 + }, + { + "epoch": 6.21, + "learning_rate": 2.930969703179971e-05, + "loss": 2.7367, + "step": 242300 + }, + { + "epoch": 6.21, + "learning_rate": 2.930115790552311e-05, + "loss": 2.7765, + "step": 242400 + }, + { + "epoch": 6.21, + "learning_rate": 2.929261877924651e-05, + "loss": 2.806, + "step": 242500 + }, + { + "epoch": 6.21, + "learning_rate": 2.928407965296991e-05, + "loss": 2.7895, + "step": 242600 + }, + { + "epoch": 6.22, + "learning_rate": 2.9275540526693314e-05, + "loss": 2.7328, + "step": 242700 + }, + { + "epoch": 6.22, + "learning_rate": 2.9267001400416712e-05, + "loss": 2.5228, + "step": 242800 + }, + { + "epoch": 6.22, + "learning_rate": 2.9258462274140113e-05, + "loss": 2.6651, + "step": 242900 + }, + { + "epoch": 6.23, + "learning_rate": 2.924992314786351e-05, + "loss": 2.7362, + "step": 243000 + }, + { + "epoch": 6.23, + "learning_rate": 2.9241384021586916e-05, + "loss": 2.8293, + "step": 243100 + }, + { + "epoch": 6.23, + "learning_rate": 2.9232844895310315e-05, + "loss": 2.7309, + "step": 243200 + }, + { + "epoch": 6.23, + "learning_rate": 2.9224305769033716e-05, + "loss": 2.5739, + "step": 243300 + }, + { + "epoch": 6.24, + "learning_rate": 2.9215766642757114e-05, + "loss": 2.6963, + "step": 243400 + }, + { + "epoch": 6.24, + "learning_rate": 2.920722751648052e-05, + "loss": 2.8438, + "step": 243500 + }, + { + "epoch": 6.24, + "learning_rate": 2.9198688390203914e-05, + "loss": 2.5773, + "step": 243600 + }, + { + "epoch": 6.24, + "learning_rate": 2.919014926392732e-05, + "loss": 2.6294, + "step": 243700 + }, + { + "epoch": 6.25, + "learning_rate": 2.9181610137650717e-05, + "loss": 2.5419, + "step": 243800 + }, + { + "epoch": 6.25, + "learning_rate": 2.917307101137412e-05, + "loss": 2.6182, + "step": 243900 + }, + { + "epoch": 6.25, + "learning_rate": 2.9164531885097517e-05, + "loss": 2.6046, + "step": 244000 + }, + { + "epoch": 6.25, + "learning_rate": 2.915599275882092e-05, + "loss": 2.6727, + "step": 244100 + }, + { + "epoch": 6.26, + "learning_rate": 2.914745363254432e-05, + "loss": 2.4974, + "step": 244200 + }, + { + "epoch": 6.26, + "learning_rate": 2.913891450626772e-05, + "loss": 2.6973, + "step": 244300 + }, + { + "epoch": 6.26, + "learning_rate": 2.913037537999112e-05, + "loss": 2.7346, + "step": 244400 + }, + { + "epoch": 6.26, + "learning_rate": 2.9121836253714524e-05, + "loss": 2.6768, + "step": 244500 + }, + { + "epoch": 6.27, + "learning_rate": 2.9113297127437923e-05, + "loss": 2.6179, + "step": 244600 + }, + { + "epoch": 6.27, + "learning_rate": 2.9104758001161324e-05, + "loss": 2.651, + "step": 244700 + }, + { + "epoch": 6.27, + "learning_rate": 2.9096218874884722e-05, + "loss": 2.6308, + "step": 244800 + }, + { + "epoch": 6.27, + "learning_rate": 2.9087679748608127e-05, + "loss": 2.9426, + "step": 244900 + }, + { + "epoch": 6.28, + "learning_rate": 2.9079140622331525e-05, + "loss": 2.8143, + "step": 245000 + }, + { + "epoch": 6.28, + "learning_rate": 2.9070601496054927e-05, + "loss": 2.5903, + "step": 245100 + }, + { + "epoch": 6.28, + "learning_rate": 2.9062062369778325e-05, + "loss": 2.7973, + "step": 245200 + }, + { + "epoch": 6.28, + "learning_rate": 2.905352324350173e-05, + "loss": 2.486, + "step": 245300 + }, + { + "epoch": 6.29, + "learning_rate": 2.9044984117225128e-05, + "loss": 2.7287, + "step": 245400 + }, + { + "epoch": 6.29, + "learning_rate": 2.9036444990948526e-05, + "loss": 2.7824, + "step": 245500 + }, + { + "epoch": 6.29, + "learning_rate": 2.9027905864671928e-05, + "loss": 2.6783, + "step": 245600 + }, + { + "epoch": 6.29, + "learning_rate": 2.9019366738395326e-05, + "loss": 2.7329, + "step": 245700 + }, + { + "epoch": 6.3, + "learning_rate": 2.901082761211873e-05, + "loss": 2.5198, + "step": 245800 + }, + { + "epoch": 6.3, + "learning_rate": 2.9002288485842126e-05, + "loss": 2.7395, + "step": 245900 + }, + { + "epoch": 6.3, + "learning_rate": 2.899374935956553e-05, + "loss": 2.7563, + "step": 246000 + }, + { + "epoch": 6.3, + "learning_rate": 2.898521023328893e-05, + "loss": 2.6676, + "step": 246100 + }, + { + "epoch": 6.31, + "learning_rate": 2.897667110701233e-05, + "loss": 2.6968, + "step": 246200 + }, + { + "epoch": 6.31, + "learning_rate": 2.896813198073573e-05, + "loss": 2.7464, + "step": 246300 + }, + { + "epoch": 6.31, + "learning_rate": 2.8959592854459134e-05, + "loss": 2.6647, + "step": 246400 + }, + { + "epoch": 6.31, + "learning_rate": 2.8951053728182532e-05, + "loss": 2.8056, + "step": 246500 + }, + { + "epoch": 6.32, + "learning_rate": 2.8942514601905933e-05, + "loss": 2.7379, + "step": 246600 + }, + { + "epoch": 6.32, + "learning_rate": 2.893397547562933e-05, + "loss": 2.6462, + "step": 246700 + }, + { + "epoch": 6.32, + "learning_rate": 2.8925436349352736e-05, + "loss": 2.7145, + "step": 246800 + }, + { + "epoch": 6.32, + "learning_rate": 2.8916897223076135e-05, + "loss": 2.7956, + "step": 246900 + }, + { + "epoch": 6.33, + "learning_rate": 2.8908358096799536e-05, + "loss": 2.8627, + "step": 247000 + }, + { + "epoch": 6.33, + "learning_rate": 2.8899818970522934e-05, + "loss": 2.736, + "step": 247100 + }, + { + "epoch": 6.33, + "learning_rate": 2.889127984424634e-05, + "loss": 2.6867, + "step": 247200 + }, + { + "epoch": 6.34, + "learning_rate": 2.8882740717969737e-05, + "loss": 2.7596, + "step": 247300 + }, + { + "epoch": 6.34, + "learning_rate": 2.887420159169314e-05, + "loss": 2.6373, + "step": 247400 + }, + { + "epoch": 6.34, + "learning_rate": 2.8865662465416537e-05, + "loss": 2.5411, + "step": 247500 + }, + { + "epoch": 6.34, + "learning_rate": 2.8857123339139942e-05, + "loss": 2.7351, + "step": 247600 + }, + { + "epoch": 6.35, + "learning_rate": 2.884858421286334e-05, + "loss": 2.6872, + "step": 247700 + }, + { + "epoch": 6.35, + "learning_rate": 2.8840045086586742e-05, + "loss": 2.6734, + "step": 247800 + }, + { + "epoch": 6.35, + "learning_rate": 2.883150596031014e-05, + "loss": 2.681, + "step": 247900 + }, + { + "epoch": 6.35, + "learning_rate": 2.8822966834033545e-05, + "loss": 2.5425, + "step": 248000 + }, + { + "epoch": 6.36, + "learning_rate": 2.8814427707756943e-05, + "loss": 2.7357, + "step": 248100 + }, + { + "epoch": 6.36, + "learning_rate": 2.8805888581480345e-05, + "loss": 2.6655, + "step": 248200 + }, + { + "epoch": 6.36, + "learning_rate": 2.8797349455203743e-05, + "loss": 2.7599, + "step": 248300 + }, + { + "epoch": 6.36, + "learning_rate": 2.8788810328927148e-05, + "loss": 2.4836, + "step": 248400 + }, + { + "epoch": 6.37, + "learning_rate": 2.8780271202650542e-05, + "loss": 2.4861, + "step": 248500 + }, + { + "epoch": 6.37, + "learning_rate": 2.8771732076373947e-05, + "loss": 2.5584, + "step": 248600 + }, + { + "epoch": 6.37, + "learning_rate": 2.8763192950097345e-05, + "loss": 2.6304, + "step": 248700 + }, + { + "epoch": 6.37, + "learning_rate": 2.8754653823820747e-05, + "loss": 2.534, + "step": 248800 + }, + { + "epoch": 6.38, + "learning_rate": 2.8746114697544145e-05, + "loss": 2.6754, + "step": 248900 + }, + { + "epoch": 6.38, + "learning_rate": 2.873757557126755e-05, + "loss": 2.763, + "step": 249000 + }, + { + "epoch": 6.38, + "learning_rate": 2.8729036444990948e-05, + "loss": 2.7081, + "step": 249100 + }, + { + "epoch": 6.38, + "learning_rate": 2.872049731871435e-05, + "loss": 2.7015, + "step": 249200 + }, + { + "epoch": 6.39, + "learning_rate": 2.8711958192437748e-05, + "loss": 2.5221, + "step": 249300 + }, + { + "epoch": 6.39, + "learning_rate": 2.8703419066161153e-05, + "loss": 2.7446, + "step": 249400 + }, + { + "epoch": 6.39, + "learning_rate": 2.869487993988455e-05, + "loss": 2.7252, + "step": 249500 + }, + { + "epoch": 6.39, + "learning_rate": 2.8686340813607953e-05, + "loss": 2.7931, + "step": 249600 + }, + { + "epoch": 6.4, + "learning_rate": 2.867780168733135e-05, + "loss": 2.5381, + "step": 249700 + }, + { + "epoch": 6.4, + "learning_rate": 2.8669262561054756e-05, + "loss": 2.6714, + "step": 249800 + }, + { + "epoch": 6.4, + "learning_rate": 2.8660723434778154e-05, + "loss": 2.5294, + "step": 249900 + }, + { + "epoch": 6.4, + "learning_rate": 2.8652184308501555e-05, + "loss": 2.6885, + "step": 250000 + }, + { + "epoch": 6.41, + "learning_rate": 2.8643645182224954e-05, + "loss": 2.7648, + "step": 250100 + }, + { + "epoch": 6.41, + "learning_rate": 2.863510605594836e-05, + "loss": 2.6819, + "step": 250200 + }, + { + "epoch": 6.41, + "learning_rate": 2.8626566929671757e-05, + "loss": 2.5423, + "step": 250300 + }, + { + "epoch": 6.41, + "learning_rate": 2.8618027803395158e-05, + "loss": 2.5962, + "step": 250400 + }, + { + "epoch": 6.42, + "learning_rate": 2.8609488677118556e-05, + "loss": 2.5923, + "step": 250500 + }, + { + "epoch": 6.42, + "learning_rate": 2.860094955084196e-05, + "loss": 2.6878, + "step": 250600 + }, + { + "epoch": 6.42, + "learning_rate": 2.859241042456536e-05, + "loss": 2.7258, + "step": 250700 + }, + { + "epoch": 6.42, + "learning_rate": 2.858387129828876e-05, + "loss": 2.7176, + "step": 250800 + }, + { + "epoch": 6.43, + "learning_rate": 2.857533217201216e-05, + "loss": 2.7349, + "step": 250900 + }, + { + "epoch": 6.43, + "learning_rate": 2.8566793045735564e-05, + "loss": 2.5558, + "step": 251000 + }, + { + "epoch": 6.43, + "learning_rate": 2.855825391945896e-05, + "loss": 2.6889, + "step": 251100 + }, + { + "epoch": 6.44, + "learning_rate": 2.8549714793182364e-05, + "loss": 2.7017, + "step": 251200 + }, + { + "epoch": 6.44, + "learning_rate": 2.8541175666905762e-05, + "loss": 2.7386, + "step": 251300 + }, + { + "epoch": 6.44, + "learning_rate": 2.8532636540629164e-05, + "loss": 2.686, + "step": 251400 + }, + { + "epoch": 6.44, + "learning_rate": 2.8524097414352562e-05, + "loss": 2.8088, + "step": 251500 + }, + { + "epoch": 6.45, + "learning_rate": 2.8515558288075967e-05, + "loss": 2.6469, + "step": 251600 + }, + { + "epoch": 6.45, + "learning_rate": 2.8507019161799365e-05, + "loss": 2.7537, + "step": 251700 + }, + { + "epoch": 6.45, + "learning_rate": 2.8498480035522766e-05, + "loss": 2.7216, + "step": 251800 + }, + { + "epoch": 6.45, + "learning_rate": 2.8489940909246165e-05, + "loss": 2.6939, + "step": 251900 + }, + { + "epoch": 6.46, + "learning_rate": 2.848140178296957e-05, + "loss": 2.5937, + "step": 252000 + }, + { + "epoch": 6.46, + "learning_rate": 2.8472862656692968e-05, + "loss": 2.7772, + "step": 252100 + }, + { + "epoch": 6.46, + "learning_rate": 2.846432353041637e-05, + "loss": 2.5186, + "step": 252200 + }, + { + "epoch": 6.46, + "learning_rate": 2.8455784404139767e-05, + "loss": 2.5647, + "step": 252300 + }, + { + "epoch": 6.47, + "learning_rate": 2.8447245277863172e-05, + "loss": 2.7313, + "step": 252400 + }, + { + "epoch": 6.47, + "learning_rate": 2.843870615158657e-05, + "loss": 2.8239, + "step": 252500 + }, + { + "epoch": 6.47, + "learning_rate": 2.8430167025309972e-05, + "loss": 2.6443, + "step": 252600 + }, + { + "epoch": 6.47, + "learning_rate": 2.842162789903337e-05, + "loss": 2.6207, + "step": 252700 + }, + { + "epoch": 6.48, + "learning_rate": 2.8413088772756775e-05, + "loss": 2.5298, + "step": 252800 + }, + { + "epoch": 6.48, + "learning_rate": 2.8404549646480173e-05, + "loss": 2.7086, + "step": 252900 + }, + { + "epoch": 6.48, + "learning_rate": 2.8396010520203575e-05, + "loss": 2.7574, + "step": 253000 + }, + { + "epoch": 6.48, + "learning_rate": 2.8387471393926973e-05, + "loss": 2.7623, + "step": 253100 + }, + { + "epoch": 6.49, + "learning_rate": 2.8378932267650378e-05, + "loss": 2.5917, + "step": 253200 + }, + { + "epoch": 6.49, + "learning_rate": 2.8370393141373776e-05, + "loss": 2.3612, + "step": 253300 + }, + { + "epoch": 6.49, + "learning_rate": 2.8361854015097178e-05, + "loss": 2.4796, + "step": 253400 + }, + { + "epoch": 6.49, + "learning_rate": 2.8353314888820576e-05, + "loss": 2.4856, + "step": 253500 + }, + { + "epoch": 6.5, + "learning_rate": 2.834477576254398e-05, + "loss": 2.5118, + "step": 253600 + }, + { + "epoch": 6.5, + "learning_rate": 2.8336236636267375e-05, + "loss": 2.663, + "step": 253700 + }, + { + "epoch": 6.5, + "learning_rate": 2.832769750999078e-05, + "loss": 2.6695, + "step": 253800 + }, + { + "epoch": 6.5, + "learning_rate": 2.831915838371418e-05, + "loss": 2.4447, + "step": 253900 + }, + { + "epoch": 6.51, + "learning_rate": 2.831061925743758e-05, + "loss": 2.6524, + "step": 254000 + }, + { + "epoch": 6.51, + "learning_rate": 2.8302080131160978e-05, + "loss": 2.6497, + "step": 254100 + }, + { + "epoch": 6.51, + "learning_rate": 2.8293541004884383e-05, + "loss": 2.6326, + "step": 254200 + }, + { + "epoch": 6.51, + "learning_rate": 2.828500187860778e-05, + "loss": 2.6074, + "step": 254300 + }, + { + "epoch": 6.52, + "learning_rate": 2.8276462752331183e-05, + "loss": 2.4123, + "step": 254400 + }, + { + "epoch": 6.52, + "learning_rate": 2.826792362605458e-05, + "loss": 2.6305, + "step": 254500 + }, + { + "epoch": 6.52, + "learning_rate": 2.8259384499777986e-05, + "loss": 2.5838, + "step": 254600 + }, + { + "epoch": 6.52, + "learning_rate": 2.8250845373501384e-05, + "loss": 2.6634, + "step": 254700 + }, + { + "epoch": 6.53, + "learning_rate": 2.8242306247224786e-05, + "loss": 2.66, + "step": 254800 + }, + { + "epoch": 6.53, + "learning_rate": 2.8233767120948184e-05, + "loss": 2.7131, + "step": 254900 + }, + { + "epoch": 6.53, + "learning_rate": 2.822522799467159e-05, + "loss": 2.5279, + "step": 255000 + }, + { + "epoch": 6.53, + "learning_rate": 2.8216688868394987e-05, + "loss": 2.6542, + "step": 255100 + }, + { + "epoch": 6.54, + "learning_rate": 2.820814974211839e-05, + "loss": 2.4608, + "step": 255200 + }, + { + "epoch": 6.54, + "learning_rate": 2.8199610615841787e-05, + "loss": 2.6652, + "step": 255300 + }, + { + "epoch": 6.54, + "learning_rate": 2.819107148956519e-05, + "loss": 2.5986, + "step": 255400 + }, + { + "epoch": 6.55, + "learning_rate": 2.818253236328859e-05, + "loss": 2.6741, + "step": 255500 + }, + { + "epoch": 6.55, + "learning_rate": 2.817399323701199e-05, + "loss": 2.7155, + "step": 255600 + }, + { + "epoch": 6.55, + "learning_rate": 2.816545411073539e-05, + "loss": 2.577, + "step": 255700 + }, + { + "epoch": 6.55, + "learning_rate": 2.8156914984458794e-05, + "loss": 2.4684, + "step": 255800 + }, + { + "epoch": 6.56, + "learning_rate": 2.8148375858182193e-05, + "loss": 2.631, + "step": 255900 + }, + { + "epoch": 6.56, + "learning_rate": 2.8139836731905594e-05, + "loss": 2.8226, + "step": 256000 + }, + { + "epoch": 6.56, + "learning_rate": 2.8131297605628992e-05, + "loss": 2.7944, + "step": 256100 + }, + { + "epoch": 6.56, + "learning_rate": 2.8122758479352397e-05, + "loss": 2.5859, + "step": 256200 + }, + { + "epoch": 6.57, + "learning_rate": 2.8114219353075792e-05, + "loss": 2.6318, + "step": 256300 + }, + { + "epoch": 6.57, + "learning_rate": 2.8105680226799197e-05, + "loss": 2.6843, + "step": 256400 + }, + { + "epoch": 6.57, + "learning_rate": 2.8097141100522595e-05, + "loss": 2.6963, + "step": 256500 + }, + { + "epoch": 6.57, + "learning_rate": 2.8088601974245997e-05, + "loss": 2.6305, + "step": 256600 + }, + { + "epoch": 6.58, + "learning_rate": 2.8080062847969395e-05, + "loss": 2.6668, + "step": 256700 + }, + { + "epoch": 6.58, + "learning_rate": 2.80715237216928e-05, + "loss": 2.537, + "step": 256800 + }, + { + "epoch": 6.58, + "learning_rate": 2.8062984595416198e-05, + "loss": 2.7308, + "step": 256900 + }, + { + "epoch": 6.58, + "learning_rate": 2.80544454691396e-05, + "loss": 2.7153, + "step": 257000 + }, + { + "epoch": 6.59, + "learning_rate": 2.8045906342862998e-05, + "loss": 2.6395, + "step": 257100 + }, + { + "epoch": 6.59, + "learning_rate": 2.8037367216586403e-05, + "loss": 2.4912, + "step": 257200 + }, + { + "epoch": 6.59, + "learning_rate": 2.80288280903098e-05, + "loss": 2.6566, + "step": 257300 + }, + { + "epoch": 6.59, + "learning_rate": 2.8020288964033202e-05, + "loss": 2.5217, + "step": 257400 + }, + { + "epoch": 6.6, + "learning_rate": 2.80117498377566e-05, + "loss": 2.7992, + "step": 257500 + }, + { + "epoch": 6.6, + "learning_rate": 2.8003210711480005e-05, + "loss": 2.7055, + "step": 257600 + }, + { + "epoch": 6.6, + "learning_rate": 2.7994671585203404e-05, + "loss": 2.5757, + "step": 257700 + }, + { + "epoch": 6.6, + "learning_rate": 2.7986132458926805e-05, + "loss": 2.8099, + "step": 257800 + }, + { + "epoch": 6.61, + "learning_rate": 2.7977593332650203e-05, + "loss": 2.5271, + "step": 257900 + }, + { + "epoch": 6.61, + "learning_rate": 2.7969054206373608e-05, + "loss": 2.661, + "step": 258000 + }, + { + "epoch": 6.61, + "learning_rate": 2.7960515080097006e-05, + "loss": 2.6143, + "step": 258100 + }, + { + "epoch": 6.61, + "learning_rate": 2.7951975953820408e-05, + "loss": 2.5266, + "step": 258200 + }, + { + "epoch": 6.62, + "learning_rate": 2.7943436827543806e-05, + "loss": 2.6032, + "step": 258300 + }, + { + "epoch": 6.62, + "learning_rate": 2.793489770126721e-05, + "loss": 2.5501, + "step": 258400 + }, + { + "epoch": 6.62, + "learning_rate": 2.792635857499061e-05, + "loss": 2.6541, + "step": 258500 + }, + { + "epoch": 6.62, + "learning_rate": 2.791781944871401e-05, + "loss": 2.6406, + "step": 258600 + }, + { + "epoch": 6.63, + "learning_rate": 2.790928032243741e-05, + "loss": 2.5852, + "step": 258700 + }, + { + "epoch": 6.63, + "learning_rate": 2.7900741196160814e-05, + "loss": 2.6384, + "step": 258800 + }, + { + "epoch": 6.63, + "learning_rate": 2.789220206988421e-05, + "loss": 2.5915, + "step": 258900 + }, + { + "epoch": 6.63, + "learning_rate": 2.7883662943607613e-05, + "loss": 2.6916, + "step": 259000 + }, + { + "epoch": 6.64, + "learning_rate": 2.787512381733101e-05, + "loss": 2.6679, + "step": 259100 + }, + { + "epoch": 6.64, + "learning_rate": 2.7866584691054413e-05, + "loss": 2.732, + "step": 259200 + }, + { + "epoch": 6.64, + "learning_rate": 2.785804556477781e-05, + "loss": 2.7541, + "step": 259300 + }, + { + "epoch": 6.65, + "learning_rate": 2.7849506438501216e-05, + "loss": 2.4927, + "step": 259400 + }, + { + "epoch": 6.65, + "learning_rate": 2.7840967312224614e-05, + "loss": 2.5912, + "step": 259500 + }, + { + "epoch": 6.65, + "learning_rate": 2.7832428185948016e-05, + "loss": 2.5699, + "step": 259600 + }, + { + "epoch": 6.65, + "learning_rate": 2.7823889059671414e-05, + "loss": 2.6161, + "step": 259700 + }, + { + "epoch": 6.66, + "learning_rate": 2.781534993339482e-05, + "loss": 2.6555, + "step": 259800 + }, + { + "epoch": 6.66, + "learning_rate": 2.7806810807118217e-05, + "loss": 2.586, + "step": 259900 + }, + { + "epoch": 6.66, + "learning_rate": 2.779827168084162e-05, + "loss": 2.817, + "step": 260000 + }, + { + "epoch": 6.66, + "learning_rate": 2.7789732554565017e-05, + "loss": 2.7655, + "step": 260100 + }, + { + "epoch": 6.67, + "learning_rate": 2.7781193428288422e-05, + "loss": 2.6764, + "step": 260200 + }, + { + "epoch": 6.67, + "learning_rate": 2.777265430201182e-05, + "loss": 2.7336, + "step": 260300 + }, + { + "epoch": 6.67, + "learning_rate": 2.776411517573522e-05, + "loss": 2.7111, + "step": 260400 + }, + { + "epoch": 6.67, + "learning_rate": 2.775557604945862e-05, + "loss": 2.6737, + "step": 260500 + }, + { + "epoch": 6.68, + "learning_rate": 2.7747036923182025e-05, + "loss": 2.6634, + "step": 260600 + }, + { + "epoch": 6.68, + "learning_rate": 2.7738497796905423e-05, + "loss": 2.6087, + "step": 260700 + }, + { + "epoch": 6.68, + "learning_rate": 2.7729958670628824e-05, + "loss": 2.5933, + "step": 260800 + }, + { + "epoch": 6.68, + "learning_rate": 2.7721419544352223e-05, + "loss": 2.5006, + "step": 260900 + }, + { + "epoch": 6.69, + "learning_rate": 2.7712880418075627e-05, + "loss": 2.6066, + "step": 261000 + }, + { + "epoch": 6.69, + "learning_rate": 2.7704341291799026e-05, + "loss": 2.7219, + "step": 261100 + }, + { + "epoch": 6.69, + "learning_rate": 2.7695802165522427e-05, + "loss": 2.8082, + "step": 261200 + }, + { + "epoch": 6.69, + "learning_rate": 2.7687263039245825e-05, + "loss": 2.4713, + "step": 261300 + }, + { + "epoch": 6.7, + "learning_rate": 2.767872391296923e-05, + "loss": 2.5928, + "step": 261400 + }, + { + "epoch": 6.7, + "learning_rate": 2.7670184786692625e-05, + "loss": 2.4251, + "step": 261500 + }, + { + "epoch": 6.7, + "learning_rate": 2.766164566041603e-05, + "loss": 2.4727, + "step": 261600 + }, + { + "epoch": 6.7, + "learning_rate": 2.7653106534139428e-05, + "loss": 2.6667, + "step": 261700 + }, + { + "epoch": 6.71, + "learning_rate": 2.764456740786283e-05, + "loss": 2.6748, + "step": 261800 + }, + { + "epoch": 6.71, + "learning_rate": 2.7636028281586228e-05, + "loss": 2.7072, + "step": 261900 + }, + { + "epoch": 6.71, + "learning_rate": 2.7627489155309633e-05, + "loss": 2.6406, + "step": 262000 + }, + { + "epoch": 6.71, + "learning_rate": 2.761895002903303e-05, + "loss": 2.766, + "step": 262100 + }, + { + "epoch": 6.72, + "learning_rate": 2.7610410902756433e-05, + "loss": 2.6034, + "step": 262200 + }, + { + "epoch": 6.72, + "learning_rate": 2.760187177647983e-05, + "loss": 2.6623, + "step": 262300 + }, + { + "epoch": 6.72, + "learning_rate": 2.7593332650203236e-05, + "loss": 2.7657, + "step": 262400 + }, + { + "epoch": 6.72, + "learning_rate": 2.7584793523926634e-05, + "loss": 2.6565, + "step": 262500 + }, + { + "epoch": 6.73, + "learning_rate": 2.7576254397650035e-05, + "loss": 2.4814, + "step": 262600 + }, + { + "epoch": 6.73, + "learning_rate": 2.7567715271373433e-05, + "loss": 2.6561, + "step": 262700 + }, + { + "epoch": 6.73, + "learning_rate": 2.755917614509684e-05, + "loss": 2.7845, + "step": 262800 + }, + { + "epoch": 6.73, + "learning_rate": 2.7550637018820237e-05, + "loss": 2.7197, + "step": 262900 + }, + { + "epoch": 6.74, + "learning_rate": 2.7542097892543635e-05, + "loss": 2.4288, + "step": 263000 + }, + { + "epoch": 6.74, + "learning_rate": 2.7533558766267036e-05, + "loss": 2.6412, + "step": 263100 + }, + { + "epoch": 6.74, + "learning_rate": 2.7525019639990434e-05, + "loss": 2.6682, + "step": 263200 + }, + { + "epoch": 6.75, + "learning_rate": 2.751648051371384e-05, + "loss": 2.6347, + "step": 263300 + }, + { + "epoch": 6.75, + "learning_rate": 2.7507941387437238e-05, + "loss": 2.6485, + "step": 263400 + }, + { + "epoch": 6.75, + "learning_rate": 2.749940226116064e-05, + "loss": 2.7453, + "step": 263500 + }, + { + "epoch": 6.75, + "learning_rate": 2.7490863134884037e-05, + "loss": 2.5326, + "step": 263600 + }, + { + "epoch": 6.76, + "learning_rate": 2.7482324008607442e-05, + "loss": 2.7145, + "step": 263700 + }, + { + "epoch": 6.76, + "learning_rate": 2.7473784882330837e-05, + "loss": 2.6467, + "step": 263800 + }, + { + "epoch": 6.76, + "learning_rate": 2.7465245756054242e-05, + "loss": 2.7114, + "step": 263900 + }, + { + "epoch": 6.76, + "learning_rate": 2.745670662977764e-05, + "loss": 2.6683, + "step": 264000 + }, + { + "epoch": 6.77, + "learning_rate": 2.744816750350104e-05, + "loss": 2.5731, + "step": 264100 + }, + { + "epoch": 6.77, + "learning_rate": 2.743962837722444e-05, + "loss": 2.7838, + "step": 264200 + }, + { + "epoch": 6.77, + "learning_rate": 2.7431089250947845e-05, + "loss": 2.6185, + "step": 264300 + }, + { + "epoch": 6.77, + "learning_rate": 2.7422550124671243e-05, + "loss": 2.6239, + "step": 264400 + }, + { + "epoch": 6.78, + "learning_rate": 2.7414010998394644e-05, + "loss": 2.5764, + "step": 264500 + }, + { + "epoch": 6.78, + "learning_rate": 2.7405471872118043e-05, + "loss": 2.6734, + "step": 264600 + }, + { + "epoch": 6.78, + "learning_rate": 2.7396932745841448e-05, + "loss": 2.7873, + "step": 264700 + }, + { + "epoch": 6.78, + "learning_rate": 2.7388393619564846e-05, + "loss": 2.5816, + "step": 264800 + }, + { + "epoch": 6.79, + "learning_rate": 2.7379854493288247e-05, + "loss": 2.5118, + "step": 264900 + }, + { + "epoch": 6.79, + "learning_rate": 2.7371315367011645e-05, + "loss": 2.613, + "step": 265000 + }, + { + "epoch": 6.79, + "learning_rate": 2.736277624073505e-05, + "loss": 2.6056, + "step": 265100 + }, + { + "epoch": 6.79, + "learning_rate": 2.735423711445845e-05, + "loss": 2.6834, + "step": 265200 + }, + { + "epoch": 6.8, + "learning_rate": 2.734569798818185e-05, + "loss": 2.6059, + "step": 265300 + }, + { + "epoch": 6.8, + "learning_rate": 2.7337158861905248e-05, + "loss": 2.7789, + "step": 265400 + }, + { + "epoch": 6.8, + "learning_rate": 2.7328619735628653e-05, + "loss": 2.679, + "step": 265500 + }, + { + "epoch": 6.8, + "learning_rate": 2.732008060935205e-05, + "loss": 2.7703, + "step": 265600 + }, + { + "epoch": 6.81, + "learning_rate": 2.7311541483075453e-05, + "loss": 2.6233, + "step": 265700 + }, + { + "epoch": 6.81, + "learning_rate": 2.730300235679885e-05, + "loss": 2.7459, + "step": 265800 + }, + { + "epoch": 6.81, + "learning_rate": 2.7294463230522256e-05, + "loss": 2.5026, + "step": 265900 + }, + { + "epoch": 6.81, + "learning_rate": 2.7285924104245654e-05, + "loss": 2.6283, + "step": 266000 + }, + { + "epoch": 6.82, + "learning_rate": 2.7277384977969056e-05, + "loss": 2.621, + "step": 266100 + }, + { + "epoch": 6.82, + "learning_rate": 2.7268845851692454e-05, + "loss": 2.7844, + "step": 266200 + }, + { + "epoch": 6.82, + "learning_rate": 2.726030672541586e-05, + "loss": 2.895, + "step": 266300 + }, + { + "epoch": 6.82, + "learning_rate": 2.7251767599139253e-05, + "loss": 2.4861, + "step": 266400 + }, + { + "epoch": 6.83, + "learning_rate": 2.724322847286266e-05, + "loss": 2.6531, + "step": 266500 + }, + { + "epoch": 6.83, + "learning_rate": 2.7234689346586057e-05, + "loss": 2.6349, + "step": 266600 + }, + { + "epoch": 6.83, + "learning_rate": 2.722615022030946e-05, + "loss": 2.6672, + "step": 266700 + }, + { + "epoch": 6.83, + "learning_rate": 2.7217611094032856e-05, + "loss": 2.5503, + "step": 266800 + }, + { + "epoch": 6.84, + "learning_rate": 2.720907196775626e-05, + "loss": 2.4387, + "step": 266900 + }, + { + "epoch": 6.84, + "learning_rate": 2.720053284147966e-05, + "loss": 2.629, + "step": 267000 + }, + { + "epoch": 6.84, + "learning_rate": 2.719199371520306e-05, + "loss": 2.7743, + "step": 267100 + }, + { + "epoch": 6.84, + "learning_rate": 2.718345458892646e-05, + "loss": 2.6763, + "step": 267200 + }, + { + "epoch": 6.85, + "learning_rate": 2.7174915462649864e-05, + "loss": 2.6692, + "step": 267300 + }, + { + "epoch": 6.85, + "learning_rate": 2.7166376336373262e-05, + "loss": 2.6085, + "step": 267400 + }, + { + "epoch": 6.85, + "learning_rate": 2.7157837210096664e-05, + "loss": 2.8022, + "step": 267500 + }, + { + "epoch": 6.86, + "learning_rate": 2.7149298083820062e-05, + "loss": 2.4493, + "step": 267600 + }, + { + "epoch": 6.86, + "learning_rate": 2.7140758957543467e-05, + "loss": 2.4776, + "step": 267700 + }, + { + "epoch": 6.86, + "learning_rate": 2.7132219831266865e-05, + "loss": 2.5499, + "step": 267800 + }, + { + "epoch": 6.86, + "learning_rate": 2.7123680704990267e-05, + "loss": 2.6288, + "step": 267900 + }, + { + "epoch": 6.87, + "learning_rate": 2.7115141578713665e-05, + "loss": 2.6296, + "step": 268000 + }, + { + "epoch": 6.87, + "learning_rate": 2.710660245243707e-05, + "loss": 2.6508, + "step": 268100 + }, + { + "epoch": 6.87, + "learning_rate": 2.7098063326160468e-05, + "loss": 2.4546, + "step": 268200 + }, + { + "epoch": 6.87, + "learning_rate": 2.708952419988387e-05, + "loss": 2.6228, + "step": 268300 + }, + { + "epoch": 6.88, + "learning_rate": 2.7080985073607268e-05, + "loss": 2.6905, + "step": 268400 + }, + { + "epoch": 6.88, + "learning_rate": 2.7072445947330672e-05, + "loss": 2.5638, + "step": 268500 + }, + { + "epoch": 6.88, + "learning_rate": 2.706390682105407e-05, + "loss": 2.5877, + "step": 268600 + }, + { + "epoch": 6.88, + "learning_rate": 2.7055367694777472e-05, + "loss": 2.7355, + "step": 268700 + }, + { + "epoch": 6.89, + "learning_rate": 2.704682856850087e-05, + "loss": 2.5392, + "step": 268800 + }, + { + "epoch": 6.89, + "learning_rate": 2.7038289442224275e-05, + "loss": 2.564, + "step": 268900 + }, + { + "epoch": 6.89, + "learning_rate": 2.702975031594767e-05, + "loss": 2.7087, + "step": 269000 + }, + { + "epoch": 6.89, + "learning_rate": 2.7021211189671075e-05, + "loss": 2.7114, + "step": 269100 + }, + { + "epoch": 6.9, + "learning_rate": 2.7012672063394473e-05, + "loss": 2.7633, + "step": 269200 + }, + { + "epoch": 6.9, + "learning_rate": 2.7004132937117878e-05, + "loss": 2.6081, + "step": 269300 + }, + { + "epoch": 6.9, + "learning_rate": 2.6995593810841273e-05, + "loss": 2.5344, + "step": 269400 + }, + { + "epoch": 6.9, + "learning_rate": 2.6987054684564678e-05, + "loss": 2.7626, + "step": 269500 + }, + { + "epoch": 6.91, + "learning_rate": 2.6978515558288076e-05, + "loss": 2.5426, + "step": 269600 + }, + { + "epoch": 6.91, + "learning_rate": 2.6969976432011477e-05, + "loss": 2.7273, + "step": 269700 + }, + { + "epoch": 6.91, + "learning_rate": 2.6961437305734876e-05, + "loss": 2.5759, + "step": 269800 + }, + { + "epoch": 6.91, + "learning_rate": 2.695289817945828e-05, + "loss": 2.5017, + "step": 269900 + }, + { + "epoch": 6.92, + "learning_rate": 2.694435905318168e-05, + "loss": 2.592, + "step": 270000 + }, + { + "epoch": 6.92, + "learning_rate": 2.693581992690508e-05, + "loss": 2.4799, + "step": 270100 + }, + { + "epoch": 6.92, + "learning_rate": 2.692728080062848e-05, + "loss": 2.6806, + "step": 270200 + }, + { + "epoch": 6.92, + "learning_rate": 2.6918741674351883e-05, + "loss": 2.6202, + "step": 270300 + }, + { + "epoch": 6.93, + "learning_rate": 2.691020254807528e-05, + "loss": 2.5736, + "step": 270400 + }, + { + "epoch": 6.93, + "learning_rate": 2.6901663421798683e-05, + "loss": 2.5092, + "step": 270500 + }, + { + "epoch": 6.93, + "learning_rate": 2.689312429552208e-05, + "loss": 2.5518, + "step": 270600 + }, + { + "epoch": 6.93, + "learning_rate": 2.6884585169245486e-05, + "loss": 2.7833, + "step": 270700 + }, + { + "epoch": 6.94, + "learning_rate": 2.6876046042968884e-05, + "loss": 2.4738, + "step": 270800 + }, + { + "epoch": 6.94, + "learning_rate": 2.6867506916692286e-05, + "loss": 2.6425, + "step": 270900 + }, + { + "epoch": 6.94, + "learning_rate": 2.6858967790415684e-05, + "loss": 2.7705, + "step": 271000 + }, + { + "epoch": 6.94, + "learning_rate": 2.685042866413909e-05, + "loss": 2.5649, + "step": 271100 + }, + { + "epoch": 6.95, + "learning_rate": 2.6841889537862487e-05, + "loss": 2.6953, + "step": 271200 + }, + { + "epoch": 6.95, + "learning_rate": 2.683335041158589e-05, + "loss": 2.5427, + "step": 271300 + }, + { + "epoch": 6.95, + "learning_rate": 2.6824811285309287e-05, + "loss": 2.7958, + "step": 271400 + }, + { + "epoch": 6.96, + "learning_rate": 2.6816272159032692e-05, + "loss": 2.6604, + "step": 271500 + }, + { + "epoch": 6.96, + "learning_rate": 2.680773303275609e-05, + "loss": 2.6293, + "step": 271600 + }, + { + "epoch": 6.96, + "learning_rate": 2.679919390647949e-05, + "loss": 2.6232, + "step": 271700 + }, + { + "epoch": 6.96, + "learning_rate": 2.679065478020289e-05, + "loss": 2.6486, + "step": 271800 + }, + { + "epoch": 6.97, + "learning_rate": 2.6782115653926295e-05, + "loss": 2.5863, + "step": 271900 + }, + { + "epoch": 6.97, + "learning_rate": 2.677357652764969e-05, + "loss": 2.5454, + "step": 272000 + }, + { + "epoch": 6.97, + "learning_rate": 2.6765037401373094e-05, + "loss": 2.5873, + "step": 272100 + }, + { + "epoch": 6.97, + "learning_rate": 2.6756498275096492e-05, + "loss": 2.6361, + "step": 272200 + }, + { + "epoch": 6.98, + "learning_rate": 2.6747959148819894e-05, + "loss": 2.7453, + "step": 272300 + }, + { + "epoch": 6.98, + "learning_rate": 2.6739420022543292e-05, + "loss": 2.6073, + "step": 272400 + }, + { + "epoch": 6.98, + "learning_rate": 2.6730880896266697e-05, + "loss": 2.7198, + "step": 272500 + }, + { + "epoch": 6.98, + "learning_rate": 2.6722341769990095e-05, + "loss": 2.5774, + "step": 272600 + }, + { + "epoch": 6.99, + "learning_rate": 2.6713802643713497e-05, + "loss": 2.6256, + "step": 272700 + }, + { + "epoch": 6.99, + "learning_rate": 2.6705263517436895e-05, + "loss": 2.5441, + "step": 272800 + }, + { + "epoch": 6.99, + "learning_rate": 2.66967243911603e-05, + "loss": 2.5643, + "step": 272900 + }, + { + "epoch": 6.99, + "learning_rate": 2.6688185264883698e-05, + "loss": 2.8956, + "step": 273000 + }, + { + "epoch": 7.0, + "learning_rate": 2.66796461386071e-05, + "loss": 2.5068, + "step": 273100 + }, + { + "epoch": 7.0, + "learning_rate": 2.6671107012330498e-05, + "loss": 2.7411, + "step": 273200 + }, + { + "epoch": 7.0, + "learning_rate": 2.6662567886053903e-05, + "loss": 2.6969, + "step": 273300 + }, + { + "epoch": 7.0, + "learning_rate": 2.66540287597773e-05, + "loss": 2.4939, + "step": 273400 + }, + { + "epoch": 7.01, + "learning_rate": 2.6645489633500702e-05, + "loss": 2.5886, + "step": 273500 + }, + { + "epoch": 7.01, + "learning_rate": 2.66369505072241e-05, + "loss": 2.5141, + "step": 273600 + }, + { + "epoch": 7.01, + "learning_rate": 2.6628411380947506e-05, + "loss": 2.6091, + "step": 273700 + }, + { + "epoch": 7.01, + "learning_rate": 2.6619872254670904e-05, + "loss": 2.5974, + "step": 273800 + }, + { + "epoch": 7.02, + "learning_rate": 2.6611333128394305e-05, + "loss": 2.5129, + "step": 273900 + }, + { + "epoch": 7.02, + "learning_rate": 2.6602794002117703e-05, + "loss": 2.5836, + "step": 274000 + }, + { + "epoch": 7.02, + "learning_rate": 2.659425487584111e-05, + "loss": 2.5638, + "step": 274100 + }, + { + "epoch": 7.02, + "learning_rate": 2.6585715749564506e-05, + "loss": 2.5783, + "step": 274200 + }, + { + "epoch": 7.03, + "learning_rate": 2.6577176623287908e-05, + "loss": 2.6268, + "step": 274300 + }, + { + "epoch": 7.03, + "learning_rate": 2.6568637497011306e-05, + "loss": 2.6178, + "step": 274400 + }, + { + "epoch": 7.03, + "learning_rate": 2.656009837073471e-05, + "loss": 2.5375, + "step": 274500 + }, + { + "epoch": 7.03, + "learning_rate": 2.6551559244458106e-05, + "loss": 2.6121, + "step": 274600 + }, + { + "epoch": 7.04, + "learning_rate": 2.654302011818151e-05, + "loss": 2.5868, + "step": 274700 + }, + { + "epoch": 7.04, + "learning_rate": 2.653448099190491e-05, + "loss": 2.6258, + "step": 274800 + }, + { + "epoch": 7.04, + "learning_rate": 2.652594186562831e-05, + "loss": 2.6168, + "step": 274900 + }, + { + "epoch": 7.04, + "learning_rate": 2.651740273935171e-05, + "loss": 2.6585, + "step": 275000 + }, + { + "epoch": 7.05, + "learning_rate": 2.6508863613075114e-05, + "loss": 2.7558, + "step": 275100 + }, + { + "epoch": 7.05, + "learning_rate": 2.6500324486798512e-05, + "loss": 2.4509, + "step": 275200 + }, + { + "epoch": 7.05, + "learning_rate": 2.6491785360521913e-05, + "loss": 2.6579, + "step": 275300 + }, + { + "epoch": 7.06, + "learning_rate": 2.648324623424531e-05, + "loss": 2.5262, + "step": 275400 + }, + { + "epoch": 7.06, + "learning_rate": 2.6474707107968716e-05, + "loss": 2.5844, + "step": 275500 + }, + { + "epoch": 7.06, + "learning_rate": 2.6466167981692115e-05, + "loss": 2.4258, + "step": 275600 + }, + { + "epoch": 7.06, + "learning_rate": 2.6457628855415516e-05, + "loss": 2.6116, + "step": 275700 + }, + { + "epoch": 7.07, + "learning_rate": 2.6449089729138914e-05, + "loss": 2.4755, + "step": 275800 + }, + { + "epoch": 7.07, + "learning_rate": 2.644055060286232e-05, + "loss": 2.5765, + "step": 275900 + }, + { + "epoch": 7.07, + "learning_rate": 2.6432011476585717e-05, + "loss": 2.5403, + "step": 276000 + }, + { + "epoch": 7.07, + "learning_rate": 2.642347235030912e-05, + "loss": 2.6769, + "step": 276100 + }, + { + "epoch": 7.08, + "learning_rate": 2.6414933224032517e-05, + "loss": 2.4913, + "step": 276200 + }, + { + "epoch": 7.08, + "learning_rate": 2.6406394097755922e-05, + "loss": 2.6235, + "step": 276300 + }, + { + "epoch": 7.08, + "learning_rate": 2.639785497147932e-05, + "loss": 2.6402, + "step": 276400 + }, + { + "epoch": 7.08, + "learning_rate": 2.6389315845202722e-05, + "loss": 2.5187, + "step": 276500 + }, + { + "epoch": 7.09, + "learning_rate": 2.638077671892612e-05, + "loss": 2.7239, + "step": 276600 + }, + { + "epoch": 7.09, + "learning_rate": 2.6372237592649525e-05, + "loss": 2.5616, + "step": 276700 + }, + { + "epoch": 7.09, + "learning_rate": 2.6363698466372923e-05, + "loss": 2.5075, + "step": 276800 + }, + { + "epoch": 7.09, + "learning_rate": 2.6355159340096325e-05, + "loss": 2.6405, + "step": 276900 + }, + { + "epoch": 7.1, + "learning_rate": 2.6346620213819723e-05, + "loss": 2.7607, + "step": 277000 + }, + { + "epoch": 7.1, + "learning_rate": 2.6338081087543128e-05, + "loss": 2.6004, + "step": 277100 + }, + { + "epoch": 7.1, + "learning_rate": 2.6329541961266522e-05, + "loss": 2.3485, + "step": 277200 + }, + { + "epoch": 7.1, + "learning_rate": 2.6321002834989927e-05, + "loss": 2.6571, + "step": 277300 + }, + { + "epoch": 7.11, + "learning_rate": 2.6312463708713326e-05, + "loss": 2.6115, + "step": 277400 + }, + { + "epoch": 7.11, + "learning_rate": 2.6303924582436727e-05, + "loss": 2.6166, + "step": 277500 + }, + { + "epoch": 7.11, + "learning_rate": 2.6295385456160125e-05, + "loss": 2.6621, + "step": 277600 + }, + { + "epoch": 7.11, + "learning_rate": 2.628684632988353e-05, + "loss": 2.652, + "step": 277700 + }, + { + "epoch": 7.12, + "learning_rate": 2.627830720360693e-05, + "loss": 2.667, + "step": 277800 + }, + { + "epoch": 7.12, + "learning_rate": 2.626976807733033e-05, + "loss": 2.6392, + "step": 277900 + }, + { + "epoch": 7.12, + "learning_rate": 2.6261228951053728e-05, + "loss": 2.5462, + "step": 278000 + }, + { + "epoch": 7.12, + "learning_rate": 2.6252689824777133e-05, + "loss": 2.514, + "step": 278100 + }, + { + "epoch": 7.13, + "learning_rate": 2.624415069850053e-05, + "loss": 2.5877, + "step": 278200 + }, + { + "epoch": 7.13, + "learning_rate": 2.6235611572223933e-05, + "loss": 2.6484, + "step": 278300 + }, + { + "epoch": 7.13, + "learning_rate": 2.622707244594733e-05, + "loss": 2.772, + "step": 278400 + }, + { + "epoch": 7.13, + "learning_rate": 2.6218533319670736e-05, + "loss": 2.6233, + "step": 278500 + }, + { + "epoch": 7.14, + "learning_rate": 2.6209994193394134e-05, + "loss": 2.4345, + "step": 278600 + }, + { + "epoch": 7.14, + "learning_rate": 2.6201455067117536e-05, + "loss": 2.6611, + "step": 278700 + }, + { + "epoch": 7.14, + "learning_rate": 2.6192915940840934e-05, + "loss": 2.6447, + "step": 278800 + }, + { + "epoch": 7.14, + "learning_rate": 2.618437681456434e-05, + "loss": 2.5533, + "step": 278900 + }, + { + "epoch": 7.15, + "learning_rate": 2.6175837688287737e-05, + "loss": 2.5604, + "step": 279000 + }, + { + "epoch": 7.15, + "learning_rate": 2.616729856201114e-05, + "loss": 2.7492, + "step": 279100 + }, + { + "epoch": 7.15, + "learning_rate": 2.6158759435734536e-05, + "loss": 2.4866, + "step": 279200 + }, + { + "epoch": 7.15, + "learning_rate": 2.615022030945794e-05, + "loss": 2.6712, + "step": 279300 + }, + { + "epoch": 7.16, + "learning_rate": 2.614168118318134e-05, + "loss": 2.5109, + "step": 279400 + }, + { + "epoch": 7.16, + "learning_rate": 2.613314205690474e-05, + "loss": 2.5113, + "step": 279500 + }, + { + "epoch": 7.16, + "learning_rate": 2.612460293062814e-05, + "loss": 2.609, + "step": 279600 + }, + { + "epoch": 7.17, + "learning_rate": 2.6116063804351544e-05, + "loss": 2.5117, + "step": 279700 + }, + { + "epoch": 7.17, + "learning_rate": 2.610752467807494e-05, + "loss": 2.7068, + "step": 279800 + }, + { + "epoch": 7.17, + "learning_rate": 2.6098985551798344e-05, + "loss": 2.6591, + "step": 279900 + }, + { + "epoch": 7.17, + "learning_rate": 2.6090446425521742e-05, + "loss": 2.5596, + "step": 280000 + }, + { + "epoch": 7.18, + "learning_rate": 2.6081907299245144e-05, + "loss": 2.7538, + "step": 280100 + }, + { + "epoch": 7.18, + "learning_rate": 2.6073368172968542e-05, + "loss": 2.5003, + "step": 280200 + }, + { + "epoch": 7.18, + "learning_rate": 2.6064829046691947e-05, + "loss": 2.6085, + "step": 280300 + }, + { + "epoch": 7.18, + "learning_rate": 2.6056289920415345e-05, + "loss": 2.5711, + "step": 280400 + }, + { + "epoch": 7.19, + "learning_rate": 2.6047750794138743e-05, + "loss": 2.6625, + "step": 280500 + }, + { + "epoch": 7.19, + "learning_rate": 2.6039211667862145e-05, + "loss": 2.6042, + "step": 280600 + }, + { + "epoch": 7.19, + "learning_rate": 2.6030672541585543e-05, + "loss": 2.5667, + "step": 280700 + }, + { + "epoch": 7.19, + "learning_rate": 2.6022133415308948e-05, + "loss": 2.5048, + "step": 280800 + }, + { + "epoch": 7.2, + "learning_rate": 2.6013594289032346e-05, + "loss": 2.5722, + "step": 280900 + }, + { + "epoch": 7.2, + "learning_rate": 2.6005055162755747e-05, + "loss": 2.5667, + "step": 281000 + }, + { + "epoch": 7.2, + "learning_rate": 2.5996516036479146e-05, + "loss": 2.6249, + "step": 281100 + }, + { + "epoch": 7.2, + "learning_rate": 2.598797691020255e-05, + "loss": 2.4988, + "step": 281200 + }, + { + "epoch": 7.21, + "learning_rate": 2.597943778392595e-05, + "loss": 2.5008, + "step": 281300 + }, + { + "epoch": 7.21, + "learning_rate": 2.597089865764935e-05, + "loss": 2.7434, + "step": 281400 + }, + { + "epoch": 7.21, + "learning_rate": 2.596235953137275e-05, + "loss": 2.6359, + "step": 281500 + }, + { + "epoch": 7.21, + "learning_rate": 2.5953820405096153e-05, + "loss": 2.7356, + "step": 281600 + }, + { + "epoch": 7.22, + "learning_rate": 2.594528127881955e-05, + "loss": 2.3953, + "step": 281700 + }, + { + "epoch": 7.22, + "learning_rate": 2.5936742152542953e-05, + "loss": 2.4301, + "step": 281800 + }, + { + "epoch": 7.22, + "learning_rate": 2.592820302626635e-05, + "loss": 2.5427, + "step": 281900 + }, + { + "epoch": 7.22, + "learning_rate": 2.5919663899989756e-05, + "loss": 2.526, + "step": 282000 + }, + { + "epoch": 7.23, + "learning_rate": 2.591112477371315e-05, + "loss": 2.6178, + "step": 282100 + }, + { + "epoch": 7.23, + "learning_rate": 2.5902585647436556e-05, + "loss": 2.6561, + "step": 282200 + }, + { + "epoch": 7.23, + "learning_rate": 2.5894046521159954e-05, + "loss": 2.4964, + "step": 282300 + }, + { + "epoch": 7.23, + "learning_rate": 2.5885507394883356e-05, + "loss": 2.4569, + "step": 282400 + }, + { + "epoch": 7.24, + "learning_rate": 2.5876968268606754e-05, + "loss": 2.5758, + "step": 282500 + }, + { + "epoch": 7.24, + "learning_rate": 2.586842914233016e-05, + "loss": 2.5038, + "step": 282600 + }, + { + "epoch": 7.24, + "learning_rate": 2.5859890016053557e-05, + "loss": 2.5867, + "step": 282700 + }, + { + "epoch": 7.24, + "learning_rate": 2.585135088977696e-05, + "loss": 2.7712, + "step": 282800 + }, + { + "epoch": 7.25, + "learning_rate": 2.5842811763500356e-05, + "loss": 2.3986, + "step": 282900 + }, + { + "epoch": 7.25, + "learning_rate": 2.583427263722376e-05, + "loss": 2.6106, + "step": 283000 + }, + { + "epoch": 7.25, + "learning_rate": 2.582573351094716e-05, + "loss": 2.6787, + "step": 283100 + }, + { + "epoch": 7.25, + "learning_rate": 2.581719438467056e-05, + "loss": 2.586, + "step": 283200 + }, + { + "epoch": 7.26, + "learning_rate": 2.580865525839396e-05, + "loss": 2.4844, + "step": 283300 + }, + { + "epoch": 7.26, + "learning_rate": 2.5800116132117364e-05, + "loss": 2.7313, + "step": 283400 + }, + { + "epoch": 7.26, + "learning_rate": 2.5791577005840762e-05, + "loss": 2.6168, + "step": 283500 + }, + { + "epoch": 7.27, + "learning_rate": 2.5783037879564164e-05, + "loss": 2.4937, + "step": 283600 + }, + { + "epoch": 7.27, + "learning_rate": 2.5774498753287562e-05, + "loss": 2.7365, + "step": 283700 + }, + { + "epoch": 7.27, + "learning_rate": 2.5765959627010967e-05, + "loss": 2.626, + "step": 283800 + }, + { + "epoch": 7.27, + "learning_rate": 2.5757420500734365e-05, + "loss": 2.7652, + "step": 283900 + }, + { + "epoch": 7.28, + "learning_rate": 2.5748881374457767e-05, + "loss": 2.7086, + "step": 284000 + }, + { + "epoch": 7.28, + "learning_rate": 2.5740342248181165e-05, + "loss": 2.5696, + "step": 284100 + }, + { + "epoch": 7.28, + "learning_rate": 2.573180312190457e-05, + "loss": 2.6369, + "step": 284200 + }, + { + "epoch": 7.28, + "learning_rate": 2.5723263995627968e-05, + "loss": 2.6651, + "step": 284300 + }, + { + "epoch": 7.29, + "learning_rate": 2.571472486935137e-05, + "loss": 2.7067, + "step": 284400 + }, + { + "epoch": 7.29, + "learning_rate": 2.5706185743074768e-05, + "loss": 2.3506, + "step": 284500 + }, + { + "epoch": 7.29, + "learning_rate": 2.5697646616798173e-05, + "loss": 2.549, + "step": 284600 + }, + { + "epoch": 7.29, + "learning_rate": 2.5689107490521567e-05, + "loss": 2.7959, + "step": 284700 + }, + { + "epoch": 7.3, + "learning_rate": 2.5680568364244972e-05, + "loss": 2.7381, + "step": 284800 + }, + { + "epoch": 7.3, + "learning_rate": 2.567202923796837e-05, + "loss": 2.6734, + "step": 284900 + }, + { + "epoch": 7.3, + "learning_rate": 2.5663490111691772e-05, + "loss": 2.5239, + "step": 285000 + }, + { + "epoch": 7.3, + "learning_rate": 2.565495098541517e-05, + "loss": 2.5626, + "step": 285100 + }, + { + "epoch": 7.31, + "learning_rate": 2.5646411859138575e-05, + "loss": 2.4925, + "step": 285200 + }, + { + "epoch": 7.31, + "learning_rate": 2.5637872732861973e-05, + "loss": 2.6307, + "step": 285300 + }, + { + "epoch": 7.31, + "learning_rate": 2.5629333606585375e-05, + "loss": 2.4771, + "step": 285400 + }, + { + "epoch": 7.31, + "learning_rate": 2.5620794480308773e-05, + "loss": 2.5061, + "step": 285500 + }, + { + "epoch": 7.32, + "learning_rate": 2.5612255354032178e-05, + "loss": 2.5368, + "step": 285600 + }, + { + "epoch": 7.32, + "learning_rate": 2.5603716227755576e-05, + "loss": 2.7254, + "step": 285700 + }, + { + "epoch": 7.32, + "learning_rate": 2.5595177101478978e-05, + "loss": 2.6842, + "step": 285800 + }, + { + "epoch": 7.32, + "learning_rate": 2.5586637975202376e-05, + "loss": 2.7254, + "step": 285900 + }, + { + "epoch": 7.33, + "learning_rate": 2.557809884892578e-05, + "loss": 2.6652, + "step": 286000 + }, + { + "epoch": 7.33, + "learning_rate": 2.556955972264918e-05, + "loss": 2.473, + "step": 286100 + }, + { + "epoch": 7.33, + "learning_rate": 2.556102059637258e-05, + "loss": 2.6769, + "step": 286200 + }, + { + "epoch": 7.33, + "learning_rate": 2.555248147009598e-05, + "loss": 2.6187, + "step": 286300 + }, + { + "epoch": 7.34, + "learning_rate": 2.5543942343819384e-05, + "loss": 2.6971, + "step": 286400 + }, + { + "epoch": 7.34, + "learning_rate": 2.5535403217542782e-05, + "loss": 2.5785, + "step": 286500 + }, + { + "epoch": 7.34, + "learning_rate": 2.5526864091266183e-05, + "loss": 2.6179, + "step": 286600 + }, + { + "epoch": 7.34, + "learning_rate": 2.551832496498958e-05, + "loss": 2.4922, + "step": 286700 + }, + { + "epoch": 7.35, + "learning_rate": 2.5509785838712986e-05, + "loss": 2.623, + "step": 286800 + }, + { + "epoch": 7.35, + "learning_rate": 2.5501246712436385e-05, + "loss": 2.6506, + "step": 286900 + }, + { + "epoch": 7.35, + "learning_rate": 2.5492707586159786e-05, + "loss": 2.4052, + "step": 287000 + }, + { + "epoch": 7.35, + "learning_rate": 2.5484168459883184e-05, + "loss": 2.5711, + "step": 287100 + }, + { + "epoch": 7.36, + "learning_rate": 2.547562933360659e-05, + "loss": 2.7055, + "step": 287200 + }, + { + "epoch": 7.36, + "learning_rate": 2.5467090207329984e-05, + "loss": 2.4519, + "step": 287300 + }, + { + "epoch": 7.36, + "learning_rate": 2.545855108105339e-05, + "loss": 2.6229, + "step": 287400 + }, + { + "epoch": 7.36, + "learning_rate": 2.5450011954776787e-05, + "loss": 2.6368, + "step": 287500 + }, + { + "epoch": 7.37, + "learning_rate": 2.544147282850019e-05, + "loss": 2.7205, + "step": 287600 + }, + { + "epoch": 7.37, + "learning_rate": 2.5432933702223587e-05, + "loss": 2.6398, + "step": 287700 + }, + { + "epoch": 7.37, + "learning_rate": 2.542439457594699e-05, + "loss": 2.5763, + "step": 287800 + }, + { + "epoch": 7.38, + "learning_rate": 2.541585544967039e-05, + "loss": 2.5818, + "step": 287900 + }, + { + "epoch": 7.38, + "learning_rate": 2.540731632339379e-05, + "loss": 2.5528, + "step": 288000 + }, + { + "epoch": 7.38, + "learning_rate": 2.539877719711719e-05, + "loss": 2.5105, + "step": 288100 + }, + { + "epoch": 7.38, + "learning_rate": 2.5390238070840594e-05, + "loss": 2.5778, + "step": 288200 + }, + { + "epoch": 7.39, + "learning_rate": 2.5381698944563993e-05, + "loss": 2.6856, + "step": 288300 + }, + { + "epoch": 7.39, + "learning_rate": 2.5373159818287394e-05, + "loss": 2.6557, + "step": 288400 + }, + { + "epoch": 7.39, + "learning_rate": 2.5364620692010792e-05, + "loss": 2.5503, + "step": 288500 + }, + { + "epoch": 7.39, + "learning_rate": 2.5356081565734197e-05, + "loss": 2.5558, + "step": 288600 + }, + { + "epoch": 7.4, + "learning_rate": 2.5347542439457595e-05, + "loss": 2.6848, + "step": 288700 + }, + { + "epoch": 7.4, + "learning_rate": 2.5339003313180997e-05, + "loss": 2.5382, + "step": 288800 + }, + { + "epoch": 7.4, + "learning_rate": 2.5330464186904395e-05, + "loss": 2.6446, + "step": 288900 + }, + { + "epoch": 7.4, + "learning_rate": 2.53219250606278e-05, + "loss": 2.7562, + "step": 289000 + }, + { + "epoch": 7.41, + "learning_rate": 2.5313385934351198e-05, + "loss": 2.4963, + "step": 289100 + }, + { + "epoch": 7.41, + "learning_rate": 2.53048468080746e-05, + "loss": 2.5525, + "step": 289200 + }, + { + "epoch": 7.41, + "learning_rate": 2.5296307681797998e-05, + "loss": 2.6787, + "step": 289300 + }, + { + "epoch": 7.41, + "learning_rate": 2.5287768555521403e-05, + "loss": 2.6217, + "step": 289400 + }, + { + "epoch": 7.42, + "learning_rate": 2.52792294292448e-05, + "loss": 2.3872, + "step": 289500 + }, + { + "epoch": 7.42, + "learning_rate": 2.5270690302968203e-05, + "loss": 2.7578, + "step": 289600 + }, + { + "epoch": 7.42, + "learning_rate": 2.52621511766916e-05, + "loss": 2.5643, + "step": 289700 + }, + { + "epoch": 7.42, + "learning_rate": 2.5253612050415006e-05, + "loss": 2.4704, + "step": 289800 + }, + { + "epoch": 7.43, + "learning_rate": 2.52450729241384e-05, + "loss": 2.5876, + "step": 289900 + }, + { + "epoch": 7.43, + "learning_rate": 2.5236533797861805e-05, + "loss": 2.6602, + "step": 290000 + }, + { + "epoch": 7.43, + "learning_rate": 2.5227994671585204e-05, + "loss": 2.4969, + "step": 290100 + }, + { + "epoch": 7.43, + "learning_rate": 2.5219455545308605e-05, + "loss": 2.6634, + "step": 290200 + }, + { + "epoch": 7.44, + "learning_rate": 2.5210916419032003e-05, + "loss": 2.5549, + "step": 290300 + }, + { + "epoch": 7.44, + "learning_rate": 2.5202377292755408e-05, + "loss": 2.5112, + "step": 290400 + }, + { + "epoch": 7.44, + "learning_rate": 2.5193838166478806e-05, + "loss": 2.4683, + "step": 290500 + }, + { + "epoch": 7.44, + "learning_rate": 2.5185299040202208e-05, + "loss": 2.605, + "step": 290600 + }, + { + "epoch": 7.45, + "learning_rate": 2.5176759913925606e-05, + "loss": 2.5804, + "step": 290700 + }, + { + "epoch": 7.45, + "learning_rate": 2.516822078764901e-05, + "loss": 2.476, + "step": 290800 + }, + { + "epoch": 7.45, + "learning_rate": 2.515968166137241e-05, + "loss": 2.672, + "step": 290900 + }, + { + "epoch": 7.45, + "learning_rate": 2.515114253509581e-05, + "loss": 2.7042, + "step": 291000 + }, + { + "epoch": 7.46, + "learning_rate": 2.514260340881921e-05, + "loss": 2.4737, + "step": 291100 + }, + { + "epoch": 7.46, + "learning_rate": 2.5134064282542614e-05, + "loss": 2.6669, + "step": 291200 + }, + { + "epoch": 7.46, + "learning_rate": 2.5125525156266012e-05, + "loss": 2.5015, + "step": 291300 + }, + { + "epoch": 7.46, + "learning_rate": 2.5116986029989414e-05, + "loss": 2.5606, + "step": 291400 + }, + { + "epoch": 7.47, + "learning_rate": 2.5108446903712812e-05, + "loss": 2.5226, + "step": 291500 + }, + { + "epoch": 7.47, + "learning_rate": 2.5099907777436217e-05, + "loss": 2.6841, + "step": 291600 + }, + { + "epoch": 7.47, + "learning_rate": 2.5091368651159615e-05, + "loss": 2.5918, + "step": 291700 + }, + { + "epoch": 7.48, + "learning_rate": 2.5082829524883016e-05, + "loss": 2.6181, + "step": 291800 + }, + { + "epoch": 7.48, + "learning_rate": 2.5074290398606415e-05, + "loss": 2.5589, + "step": 291900 + }, + { + "epoch": 7.48, + "learning_rate": 2.506575127232982e-05, + "loss": 2.5423, + "step": 292000 + }, + { + "epoch": 7.48, + "learning_rate": 2.5057212146053218e-05, + "loss": 2.7337, + "step": 292100 + }, + { + "epoch": 7.49, + "learning_rate": 2.504867301977662e-05, + "loss": 2.6391, + "step": 292200 + }, + { + "epoch": 7.49, + "learning_rate": 2.5040133893500017e-05, + "loss": 2.3304, + "step": 292300 + }, + { + "epoch": 7.49, + "learning_rate": 2.5031594767223422e-05, + "loss": 2.4947, + "step": 292400 + }, + { + "epoch": 7.49, + "learning_rate": 2.5023055640946817e-05, + "loss": 2.49, + "step": 292500 + }, + { + "epoch": 7.5, + "learning_rate": 2.5014516514670222e-05, + "loss": 2.5896, + "step": 292600 + }, + { + "epoch": 7.5, + "learning_rate": 2.500597738839362e-05, + "loss": 2.4626, + "step": 292700 + }, + { + "epoch": 7.5, + "learning_rate": 2.499743826211702e-05, + "loss": 2.6996, + "step": 292800 + }, + { + "epoch": 7.5, + "learning_rate": 2.498889913584042e-05, + "loss": 2.556, + "step": 292900 + }, + { + "epoch": 7.51, + "learning_rate": 2.498036000956382e-05, + "loss": 2.7386, + "step": 293000 + }, + { + "epoch": 7.51, + "learning_rate": 2.4971820883287223e-05, + "loss": 2.5582, + "step": 293100 + }, + { + "epoch": 7.51, + "learning_rate": 2.4963281757010624e-05, + "loss": 2.5779, + "step": 293200 + }, + { + "epoch": 7.51, + "learning_rate": 2.4954742630734023e-05, + "loss": 2.5479, + "step": 293300 + }, + { + "epoch": 7.52, + "learning_rate": 2.4946203504457424e-05, + "loss": 2.6817, + "step": 293400 + }, + { + "epoch": 7.52, + "learning_rate": 2.4937664378180826e-05, + "loss": 2.5313, + "step": 293500 + }, + { + "epoch": 7.52, + "learning_rate": 2.4929125251904227e-05, + "loss": 2.7259, + "step": 293600 + }, + { + "epoch": 7.52, + "learning_rate": 2.4920586125627625e-05, + "loss": 2.674, + "step": 293700 + }, + { + "epoch": 7.53, + "learning_rate": 2.4912046999351027e-05, + "loss": 2.3784, + "step": 293800 + }, + { + "epoch": 7.53, + "learning_rate": 2.490350787307443e-05, + "loss": 2.4276, + "step": 293900 + }, + { + "epoch": 7.53, + "learning_rate": 2.4894968746797827e-05, + "loss": 2.6637, + "step": 294000 + }, + { + "epoch": 7.53, + "learning_rate": 2.4886429620521228e-05, + "loss": 2.4605, + "step": 294100 + }, + { + "epoch": 7.54, + "learning_rate": 2.487789049424463e-05, + "loss": 2.5052, + "step": 294200 + }, + { + "epoch": 7.54, + "learning_rate": 2.486935136796803e-05, + "loss": 2.6614, + "step": 294300 + }, + { + "epoch": 7.54, + "learning_rate": 2.486081224169143e-05, + "loss": 2.5257, + "step": 294400 + }, + { + "epoch": 7.54, + "learning_rate": 2.485227311541483e-05, + "loss": 2.5516, + "step": 294500 + }, + { + "epoch": 7.55, + "learning_rate": 2.4843733989138233e-05, + "loss": 2.7613, + "step": 294600 + }, + { + "epoch": 7.55, + "learning_rate": 2.4835194862861634e-05, + "loss": 2.5585, + "step": 294700 + }, + { + "epoch": 7.55, + "learning_rate": 2.4826655736585032e-05, + "loss": 2.5098, + "step": 294800 + }, + { + "epoch": 7.55, + "learning_rate": 2.4818116610308434e-05, + "loss": 2.607, + "step": 294900 + }, + { + "epoch": 7.56, + "learning_rate": 2.4809577484031835e-05, + "loss": 2.634, + "step": 295000 + }, + { + "epoch": 7.56, + "learning_rate": 2.4801038357755234e-05, + "loss": 2.443, + "step": 295100 + }, + { + "epoch": 7.56, + "learning_rate": 2.4792499231478635e-05, + "loss": 2.5236, + "step": 295200 + }, + { + "epoch": 7.56, + "learning_rate": 2.4783960105202037e-05, + "loss": 2.6182, + "step": 295300 + }, + { + "epoch": 7.57, + "learning_rate": 2.4775420978925438e-05, + "loss": 2.5835, + "step": 295400 + }, + { + "epoch": 7.57, + "learning_rate": 2.4766881852648836e-05, + "loss": 2.8453, + "step": 295500 + }, + { + "epoch": 7.57, + "learning_rate": 2.4758342726372238e-05, + "loss": 2.6775, + "step": 295600 + }, + { + "epoch": 7.58, + "learning_rate": 2.474980360009564e-05, + "loss": 2.7386, + "step": 295700 + }, + { + "epoch": 7.58, + "learning_rate": 2.474126447381904e-05, + "loss": 2.56, + "step": 295800 + }, + { + "epoch": 7.58, + "learning_rate": 2.473272534754244e-05, + "loss": 2.703, + "step": 295900 + }, + { + "epoch": 7.58, + "learning_rate": 2.472418622126584e-05, + "loss": 2.6336, + "step": 296000 + }, + { + "epoch": 7.59, + "learning_rate": 2.4715647094989242e-05, + "loss": 2.8031, + "step": 296100 + }, + { + "epoch": 7.59, + "learning_rate": 2.4707107968712644e-05, + "loss": 2.5844, + "step": 296200 + }, + { + "epoch": 7.59, + "learning_rate": 2.4698568842436042e-05, + "loss": 2.4344, + "step": 296300 + }, + { + "epoch": 7.59, + "learning_rate": 2.4690029716159444e-05, + "loss": 2.5045, + "step": 296400 + }, + { + "epoch": 7.6, + "learning_rate": 2.4681490589882845e-05, + "loss": 2.6255, + "step": 296500 + }, + { + "epoch": 7.6, + "learning_rate": 2.4672951463606243e-05, + "loss": 2.4738, + "step": 296600 + }, + { + "epoch": 7.6, + "learning_rate": 2.4664412337329645e-05, + "loss": 2.8838, + "step": 296700 + }, + { + "epoch": 7.6, + "learning_rate": 2.4655873211053046e-05, + "loss": 2.5313, + "step": 296800 + }, + { + "epoch": 7.61, + "learning_rate": 2.4647334084776448e-05, + "loss": 2.4704, + "step": 296900 + }, + { + "epoch": 7.61, + "learning_rate": 2.4638794958499846e-05, + "loss": 2.7296, + "step": 297000 + }, + { + "epoch": 7.61, + "learning_rate": 2.4630255832223248e-05, + "loss": 2.4954, + "step": 297100 + }, + { + "epoch": 7.61, + "learning_rate": 2.462171670594665e-05, + "loss": 2.4362, + "step": 297200 + }, + { + "epoch": 7.62, + "learning_rate": 2.461317757967005e-05, + "loss": 2.6174, + "step": 297300 + }, + { + "epoch": 7.62, + "learning_rate": 2.460463845339345e-05, + "loss": 2.7054, + "step": 297400 + }, + { + "epoch": 7.62, + "learning_rate": 2.459609932711685e-05, + "loss": 2.6661, + "step": 297500 + }, + { + "epoch": 7.62, + "learning_rate": 2.4587560200840252e-05, + "loss": 2.6271, + "step": 297600 + }, + { + "epoch": 7.63, + "learning_rate": 2.457902107456365e-05, + "loss": 2.6175, + "step": 297700 + }, + { + "epoch": 7.63, + "learning_rate": 2.457048194828705e-05, + "loss": 2.6022, + "step": 297800 + }, + { + "epoch": 7.63, + "learning_rate": 2.4561942822010453e-05, + "loss": 2.4242, + "step": 297900 + }, + { + "epoch": 7.63, + "learning_rate": 2.4553403695733855e-05, + "loss": 2.5952, + "step": 298000 + }, + { + "epoch": 7.64, + "learning_rate": 2.4544864569457253e-05, + "loss": 2.5788, + "step": 298100 + }, + { + "epoch": 7.64, + "learning_rate": 2.4536325443180654e-05, + "loss": 2.6518, + "step": 298200 + }, + { + "epoch": 7.64, + "learning_rate": 2.4527786316904056e-05, + "loss": 2.5203, + "step": 298300 + }, + { + "epoch": 7.64, + "learning_rate": 2.4519247190627458e-05, + "loss": 2.605, + "step": 298400 + }, + { + "epoch": 7.65, + "learning_rate": 2.4510708064350856e-05, + "loss": 2.6588, + "step": 298500 + }, + { + "epoch": 7.65, + "learning_rate": 2.4502168938074257e-05, + "loss": 2.5363, + "step": 298600 + }, + { + "epoch": 7.65, + "learning_rate": 2.449362981179766e-05, + "loss": 2.4543, + "step": 298700 + }, + { + "epoch": 7.65, + "learning_rate": 2.448509068552106e-05, + "loss": 2.7616, + "step": 298800 + }, + { + "epoch": 7.66, + "learning_rate": 2.447655155924446e-05, + "loss": 2.6632, + "step": 298900 + }, + { + "epoch": 7.66, + "learning_rate": 2.446801243296786e-05, + "loss": 2.5355, + "step": 299000 + }, + { + "epoch": 7.66, + "learning_rate": 2.445947330669126e-05, + "loss": 2.8136, + "step": 299100 + }, + { + "epoch": 7.66, + "learning_rate": 2.445093418041466e-05, + "loss": 2.7673, + "step": 299200 + }, + { + "epoch": 7.67, + "learning_rate": 2.444239505413806e-05, + "loss": 2.6112, + "step": 299300 + }, + { + "epoch": 7.67, + "learning_rate": 2.4433855927861463e-05, + "loss": 2.616, + "step": 299400 + }, + { + "epoch": 7.67, + "learning_rate": 2.4425316801584864e-05, + "loss": 2.6405, + "step": 299500 + }, + { + "epoch": 7.67, + "learning_rate": 2.4416777675308263e-05, + "loss": 2.6973, + "step": 299600 + }, + { + "epoch": 7.68, + "learning_rate": 2.4408238549031664e-05, + "loss": 2.7004, + "step": 299700 + }, + { + "epoch": 7.68, + "learning_rate": 2.4399699422755066e-05, + "loss": 2.5356, + "step": 299800 + }, + { + "epoch": 7.68, + "learning_rate": 2.4391160296478467e-05, + "loss": 2.6527, + "step": 299900 + }, + { + "epoch": 7.69, + "learning_rate": 2.4382621170201865e-05, + "loss": 2.4687, + "step": 300000 + }, + { + "epoch": 7.69, + "learning_rate": 2.4374082043925267e-05, + "loss": 2.3935, + "step": 300100 + }, + { + "epoch": 7.69, + "learning_rate": 2.436554291764867e-05, + "loss": 2.6342, + "step": 300200 + }, + { + "epoch": 7.69, + "learning_rate": 2.435700379137207e-05, + "loss": 2.682, + "step": 300300 + }, + { + "epoch": 7.7, + "learning_rate": 2.4348464665095468e-05, + "loss": 2.4398, + "step": 300400 + }, + { + "epoch": 7.7, + "learning_rate": 2.433992553881887e-05, + "loss": 2.7451, + "step": 300500 + }, + { + "epoch": 7.7, + "learning_rate": 2.433138641254227e-05, + "loss": 2.4977, + "step": 300600 + }, + { + "epoch": 7.7, + "learning_rate": 2.432284728626567e-05, + "loss": 2.4256, + "step": 300700 + }, + { + "epoch": 7.71, + "learning_rate": 2.431430815998907e-05, + "loss": 2.593, + "step": 300800 + }, + { + "epoch": 7.71, + "learning_rate": 2.4305769033712473e-05, + "loss": 2.3915, + "step": 300900 + }, + { + "epoch": 7.71, + "learning_rate": 2.4297229907435874e-05, + "loss": 2.5495, + "step": 301000 + }, + { + "epoch": 7.71, + "learning_rate": 2.4288690781159272e-05, + "loss": 2.8005, + "step": 301100 + }, + { + "epoch": 7.72, + "learning_rate": 2.4280151654882674e-05, + "loss": 2.5199, + "step": 301200 + }, + { + "epoch": 7.72, + "learning_rate": 2.4271612528606075e-05, + "loss": 2.6356, + "step": 301300 + }, + { + "epoch": 7.72, + "learning_rate": 2.4263073402329477e-05, + "loss": 2.6186, + "step": 301400 + }, + { + "epoch": 7.72, + "learning_rate": 2.4254534276052875e-05, + "loss": 2.3638, + "step": 301500 + }, + { + "epoch": 7.73, + "learning_rate": 2.4245995149776277e-05, + "loss": 2.4584, + "step": 301600 + }, + { + "epoch": 7.73, + "learning_rate": 2.4237456023499678e-05, + "loss": 2.4968, + "step": 301700 + }, + { + "epoch": 7.73, + "learning_rate": 2.4228916897223076e-05, + "loss": 2.4364, + "step": 301800 + }, + { + "epoch": 7.73, + "learning_rate": 2.4220377770946478e-05, + "loss": 2.4099, + "step": 301900 + }, + { + "epoch": 7.74, + "learning_rate": 2.421183864466988e-05, + "loss": 2.5179, + "step": 302000 + }, + { + "epoch": 7.74, + "learning_rate": 2.420329951839328e-05, + "loss": 2.4713, + "step": 302100 + }, + { + "epoch": 7.74, + "learning_rate": 2.419476039211668e-05, + "loss": 2.6405, + "step": 302200 + }, + { + "epoch": 7.74, + "learning_rate": 2.418622126584008e-05, + "loss": 2.4238, + "step": 302300 + }, + { + "epoch": 7.75, + "learning_rate": 2.4177682139563482e-05, + "loss": 2.4564, + "step": 302400 + }, + { + "epoch": 7.75, + "learning_rate": 2.4169143013286884e-05, + "loss": 2.554, + "step": 302500 + }, + { + "epoch": 7.75, + "learning_rate": 2.4160603887010282e-05, + "loss": 2.438, + "step": 302600 + }, + { + "epoch": 7.75, + "learning_rate": 2.4152064760733683e-05, + "loss": 2.4269, + "step": 302700 + }, + { + "epoch": 7.76, + "learning_rate": 2.4143525634457085e-05, + "loss": 2.4377, + "step": 302800 + }, + { + "epoch": 7.76, + "learning_rate": 2.4134986508180487e-05, + "loss": 2.5265, + "step": 302900 + }, + { + "epoch": 7.76, + "learning_rate": 2.4126447381903885e-05, + "loss": 2.639, + "step": 303000 + }, + { + "epoch": 7.76, + "learning_rate": 2.4117908255627286e-05, + "loss": 2.4689, + "step": 303100 + }, + { + "epoch": 7.77, + "learning_rate": 2.4109369129350688e-05, + "loss": 2.601, + "step": 303200 + }, + { + "epoch": 7.77, + "learning_rate": 2.4100830003074086e-05, + "loss": 2.5779, + "step": 303300 + }, + { + "epoch": 7.77, + "learning_rate": 2.4092290876797488e-05, + "loss": 2.6439, + "step": 303400 + }, + { + "epoch": 7.77, + "learning_rate": 2.408375175052089e-05, + "loss": 2.6182, + "step": 303500 + }, + { + "epoch": 7.78, + "learning_rate": 2.407521262424429e-05, + "loss": 2.6911, + "step": 303600 + }, + { + "epoch": 7.78, + "learning_rate": 2.406667349796769e-05, + "loss": 2.453, + "step": 303700 + }, + { + "epoch": 7.78, + "learning_rate": 2.405813437169109e-05, + "loss": 2.5643, + "step": 303800 + }, + { + "epoch": 7.79, + "learning_rate": 2.4049595245414492e-05, + "loss": 2.6153, + "step": 303900 + }, + { + "epoch": 7.79, + "learning_rate": 2.4041056119137893e-05, + "loss": 2.5002, + "step": 304000 + }, + { + "epoch": 7.79, + "learning_rate": 2.403251699286129e-05, + "loss": 2.5769, + "step": 304100 + }, + { + "epoch": 7.79, + "learning_rate": 2.4023977866584693e-05, + "loss": 2.4474, + "step": 304200 + }, + { + "epoch": 7.8, + "learning_rate": 2.4015438740308095e-05, + "loss": 2.5044, + "step": 304300 + }, + { + "epoch": 7.8, + "learning_rate": 2.4006899614031493e-05, + "loss": 2.5741, + "step": 304400 + }, + { + "epoch": 7.8, + "learning_rate": 2.3998360487754894e-05, + "loss": 2.612, + "step": 304500 + }, + { + "epoch": 7.8, + "learning_rate": 2.3989821361478296e-05, + "loss": 2.3832, + "step": 304600 + }, + { + "epoch": 7.81, + "learning_rate": 2.3981282235201697e-05, + "loss": 2.5406, + "step": 304700 + }, + { + "epoch": 7.81, + "learning_rate": 2.3972743108925096e-05, + "loss": 2.5468, + "step": 304800 + }, + { + "epoch": 7.81, + "learning_rate": 2.3964203982648497e-05, + "loss": 2.4277, + "step": 304900 + }, + { + "epoch": 7.81, + "learning_rate": 2.39556648563719e-05, + "loss": 2.4712, + "step": 305000 + }, + { + "epoch": 7.82, + "learning_rate": 2.39471257300953e-05, + "loss": 2.5574, + "step": 305100 + }, + { + "epoch": 7.82, + "learning_rate": 2.39385866038187e-05, + "loss": 2.6474, + "step": 305200 + }, + { + "epoch": 7.82, + "learning_rate": 2.39300474775421e-05, + "loss": 2.8032, + "step": 305300 + }, + { + "epoch": 7.82, + "learning_rate": 2.39215083512655e-05, + "loss": 2.5209, + "step": 305400 + }, + { + "epoch": 7.83, + "learning_rate": 2.3912969224988903e-05, + "loss": 2.5471, + "step": 305500 + }, + { + "epoch": 7.83, + "learning_rate": 2.39044300987123e-05, + "loss": 2.5731, + "step": 305600 + }, + { + "epoch": 7.83, + "learning_rate": 2.3895890972435703e-05, + "loss": 2.5844, + "step": 305700 + }, + { + "epoch": 7.83, + "learning_rate": 2.3887351846159104e-05, + "loss": 2.4313, + "step": 305800 + }, + { + "epoch": 7.84, + "learning_rate": 2.3878812719882503e-05, + "loss": 2.5094, + "step": 305900 + }, + { + "epoch": 7.84, + "learning_rate": 2.3870273593605904e-05, + "loss": 2.539, + "step": 306000 + }, + { + "epoch": 7.84, + "learning_rate": 2.3861734467329306e-05, + "loss": 2.5007, + "step": 306100 + }, + { + "epoch": 7.84, + "learning_rate": 2.3853195341052707e-05, + "loss": 2.4104, + "step": 306200 + }, + { + "epoch": 7.85, + "learning_rate": 2.3844656214776105e-05, + "loss": 2.6048, + "step": 306300 + }, + { + "epoch": 7.85, + "learning_rate": 2.3836117088499507e-05, + "loss": 2.7752, + "step": 306400 + }, + { + "epoch": 7.85, + "learning_rate": 2.382757796222291e-05, + "loss": 2.6126, + "step": 306500 + }, + { + "epoch": 7.85, + "learning_rate": 2.381903883594631e-05, + "loss": 2.4156, + "step": 306600 + }, + { + "epoch": 7.86, + "learning_rate": 2.3810499709669708e-05, + "loss": 2.5983, + "step": 306700 + }, + { + "epoch": 7.86, + "learning_rate": 2.3801960583393106e-05, + "loss": 2.4602, + "step": 306800 + }, + { + "epoch": 7.86, + "learning_rate": 2.3793421457116508e-05, + "loss": 2.6034, + "step": 306900 + }, + { + "epoch": 7.86, + "learning_rate": 2.378488233083991e-05, + "loss": 2.5708, + "step": 307000 + }, + { + "epoch": 7.87, + "learning_rate": 2.3776343204563308e-05, + "loss": 2.4888, + "step": 307100 + }, + { + "epoch": 7.87, + "learning_rate": 2.376780407828671e-05, + "loss": 2.547, + "step": 307200 + }, + { + "epoch": 7.87, + "learning_rate": 2.375926495201011e-05, + "loss": 2.5943, + "step": 307300 + }, + { + "epoch": 7.87, + "learning_rate": 2.3750725825733512e-05, + "loss": 2.5553, + "step": 307400 + }, + { + "epoch": 7.88, + "learning_rate": 2.374218669945691e-05, + "loss": 2.5765, + "step": 307500 + }, + { + "epoch": 7.88, + "learning_rate": 2.3733647573180312e-05, + "loss": 2.5925, + "step": 307600 + }, + { + "epoch": 7.88, + "learning_rate": 2.3725108446903713e-05, + "loss": 2.4431, + "step": 307700 + }, + { + "epoch": 7.89, + "learning_rate": 2.3716569320627115e-05, + "loss": 2.6363, + "step": 307800 + }, + { + "epoch": 7.89, + "learning_rate": 2.3708030194350513e-05, + "loss": 2.6017, + "step": 307900 + }, + { + "epoch": 7.89, + "learning_rate": 2.3699491068073915e-05, + "loss": 2.4631, + "step": 308000 + }, + { + "epoch": 7.89, + "learning_rate": 2.3690951941797316e-05, + "loss": 2.4872, + "step": 308100 + }, + { + "epoch": 7.9, + "learning_rate": 2.3682412815520714e-05, + "loss": 2.5542, + "step": 308200 + }, + { + "epoch": 7.9, + "learning_rate": 2.3673873689244116e-05, + "loss": 2.5386, + "step": 308300 + }, + { + "epoch": 7.9, + "learning_rate": 2.3665334562967517e-05, + "loss": 2.363, + "step": 308400 + }, + { + "epoch": 7.9, + "learning_rate": 2.365679543669092e-05, + "loss": 2.5627, + "step": 308500 + }, + { + "epoch": 7.91, + "learning_rate": 2.3648256310414317e-05, + "loss": 2.5397, + "step": 308600 + }, + { + "epoch": 7.91, + "learning_rate": 2.363971718413772e-05, + "loss": 2.571, + "step": 308700 + }, + { + "epoch": 7.91, + "learning_rate": 2.363117805786112e-05, + "loss": 2.5536, + "step": 308800 + }, + { + "epoch": 7.91, + "learning_rate": 2.3622638931584522e-05, + "loss": 2.4963, + "step": 308900 + }, + { + "epoch": 7.92, + "learning_rate": 2.361409980530792e-05, + "loss": 2.538, + "step": 309000 + }, + { + "epoch": 7.92, + "learning_rate": 2.360556067903132e-05, + "loss": 2.5102, + "step": 309100 + }, + { + "epoch": 7.92, + "learning_rate": 2.3597021552754723e-05, + "loss": 2.3795, + "step": 309200 + }, + { + "epoch": 7.92, + "learning_rate": 2.358848242647812e-05, + "loss": 2.6052, + "step": 309300 + }, + { + "epoch": 7.93, + "learning_rate": 2.3579943300201523e-05, + "loss": 2.6662, + "step": 309400 + }, + { + "epoch": 7.93, + "learning_rate": 2.3571404173924924e-05, + "loss": 2.6022, + "step": 309500 + }, + { + "epoch": 7.93, + "learning_rate": 2.3562865047648326e-05, + "loss": 2.5958, + "step": 309600 + }, + { + "epoch": 7.93, + "learning_rate": 2.3554325921371724e-05, + "loss": 2.6017, + "step": 309700 + }, + { + "epoch": 7.94, + "learning_rate": 2.3545786795095126e-05, + "loss": 2.5168, + "step": 309800 + }, + { + "epoch": 7.94, + "learning_rate": 2.3537247668818527e-05, + "loss": 2.3941, + "step": 309900 + }, + { + "epoch": 7.94, + "learning_rate": 2.352870854254193e-05, + "loss": 2.5736, + "step": 310000 + }, + { + "epoch": 7.94, + "learning_rate": 2.3520169416265327e-05, + "loss": 2.6677, + "step": 310100 + }, + { + "epoch": 7.95, + "learning_rate": 2.351163028998873e-05, + "loss": 2.5065, + "step": 310200 + }, + { + "epoch": 7.95, + "learning_rate": 2.350309116371213e-05, + "loss": 2.6352, + "step": 310300 + }, + { + "epoch": 7.95, + "learning_rate": 2.349455203743553e-05, + "loss": 2.5603, + "step": 310400 + }, + { + "epoch": 7.95, + "learning_rate": 2.348601291115893e-05, + "loss": 2.6301, + "step": 310500 + }, + { + "epoch": 7.96, + "learning_rate": 2.347747378488233e-05, + "loss": 2.6784, + "step": 310600 + }, + { + "epoch": 7.96, + "learning_rate": 2.3468934658605733e-05, + "loss": 2.4355, + "step": 310700 + }, + { + "epoch": 7.96, + "learning_rate": 2.346039553232913e-05, + "loss": 2.6839, + "step": 310800 + }, + { + "epoch": 7.96, + "learning_rate": 2.3451856406052532e-05, + "loss": 2.665, + "step": 310900 + }, + { + "epoch": 7.97, + "learning_rate": 2.3443317279775934e-05, + "loss": 2.5209, + "step": 311000 + }, + { + "epoch": 7.97, + "learning_rate": 2.3434778153499336e-05, + "loss": 2.4111, + "step": 311100 + }, + { + "epoch": 7.97, + "learning_rate": 2.3426239027222734e-05, + "loss": 2.6543, + "step": 311200 + }, + { + "epoch": 7.97, + "learning_rate": 2.3417699900946135e-05, + "loss": 2.5009, + "step": 311300 + }, + { + "epoch": 7.98, + "learning_rate": 2.3409160774669537e-05, + "loss": 2.4491, + "step": 311400 + }, + { + "epoch": 7.98, + "learning_rate": 2.340062164839294e-05, + "loss": 2.6839, + "step": 311500 + }, + { + "epoch": 7.98, + "learning_rate": 2.3392082522116337e-05, + "loss": 2.4937, + "step": 311600 + }, + { + "epoch": 7.98, + "learning_rate": 2.3383543395839738e-05, + "loss": 2.6359, + "step": 311700 + }, + { + "epoch": 7.99, + "learning_rate": 2.337500426956314e-05, + "loss": 2.591, + "step": 311800 + }, + { + "epoch": 7.99, + "learning_rate": 2.3366465143286538e-05, + "loss": 2.5896, + "step": 311900 + }, + { + "epoch": 7.99, + "learning_rate": 2.335792601700994e-05, + "loss": 2.4467, + "step": 312000 + }, + { + "epoch": 8.0, + "learning_rate": 2.334938689073334e-05, + "loss": 2.3812, + "step": 312100 + }, + { + "epoch": 8.0, + "learning_rate": 2.3340847764456742e-05, + "loss": 2.3776, + "step": 312200 + }, + { + "epoch": 8.0, + "learning_rate": 2.333230863818014e-05, + "loss": 2.6316, + "step": 312300 + }, + { + "epoch": 8.0, + "learning_rate": 2.3323769511903542e-05, + "loss": 2.5576, + "step": 312400 + }, + { + "epoch": 8.01, + "learning_rate": 2.3315230385626944e-05, + "loss": 2.5214, + "step": 312500 + }, + { + "epoch": 8.01, + "learning_rate": 2.3306691259350345e-05, + "loss": 2.5585, + "step": 312600 + }, + { + "epoch": 8.01, + "learning_rate": 2.3298152133073743e-05, + "loss": 2.5395, + "step": 312700 + }, + { + "epoch": 8.01, + "learning_rate": 2.3289613006797145e-05, + "loss": 2.4142, + "step": 312800 + }, + { + "epoch": 8.02, + "learning_rate": 2.3281073880520547e-05, + "loss": 2.576, + "step": 312900 + }, + { + "epoch": 8.02, + "learning_rate": 2.3272534754243948e-05, + "loss": 2.5555, + "step": 313000 + }, + { + "epoch": 8.02, + "learning_rate": 2.3263995627967346e-05, + "loss": 2.5797, + "step": 313100 + }, + { + "epoch": 8.02, + "learning_rate": 2.3255456501690748e-05, + "loss": 2.5472, + "step": 313200 + }, + { + "epoch": 8.03, + "learning_rate": 2.324691737541415e-05, + "loss": 2.4686, + "step": 313300 + }, + { + "epoch": 8.03, + "learning_rate": 2.3238378249137547e-05, + "loss": 2.5522, + "step": 313400 + }, + { + "epoch": 8.03, + "learning_rate": 2.322983912286095e-05, + "loss": 2.4373, + "step": 313500 + }, + { + "epoch": 8.03, + "learning_rate": 2.322129999658435e-05, + "loss": 2.487, + "step": 313600 + }, + { + "epoch": 8.04, + "learning_rate": 2.3212760870307752e-05, + "loss": 2.3727, + "step": 313700 + }, + { + "epoch": 8.04, + "learning_rate": 2.320422174403115e-05, + "loss": 2.6973, + "step": 313800 + }, + { + "epoch": 8.04, + "learning_rate": 2.3195682617754552e-05, + "loss": 2.3589, + "step": 313900 + }, + { + "epoch": 8.04, + "learning_rate": 2.3187143491477953e-05, + "loss": 2.6867, + "step": 314000 + }, + { + "epoch": 8.05, + "learning_rate": 2.3178604365201355e-05, + "loss": 2.6313, + "step": 314100 + }, + { + "epoch": 8.05, + "learning_rate": 2.3170065238924753e-05, + "loss": 2.4842, + "step": 314200 + }, + { + "epoch": 8.05, + "learning_rate": 2.3161526112648155e-05, + "loss": 2.5929, + "step": 314300 + }, + { + "epoch": 8.05, + "learning_rate": 2.3152986986371556e-05, + "loss": 2.4353, + "step": 314400 + }, + { + "epoch": 8.06, + "learning_rate": 2.3144447860094954e-05, + "loss": 2.5274, + "step": 314500 + }, + { + "epoch": 8.06, + "learning_rate": 2.3135908733818356e-05, + "loss": 2.3902, + "step": 314600 + }, + { + "epoch": 8.06, + "learning_rate": 2.3127369607541757e-05, + "loss": 2.5433, + "step": 314700 + }, + { + "epoch": 8.06, + "learning_rate": 2.311883048126516e-05, + "loss": 2.7213, + "step": 314800 + }, + { + "epoch": 8.07, + "learning_rate": 2.3110291354988557e-05, + "loss": 2.7245, + "step": 314900 + }, + { + "epoch": 8.07, + "learning_rate": 2.310175222871196e-05, + "loss": 2.6024, + "step": 315000 + }, + { + "epoch": 8.07, + "learning_rate": 2.309321310243536e-05, + "loss": 2.5649, + "step": 315100 + }, + { + "epoch": 8.07, + "learning_rate": 2.3084673976158762e-05, + "loss": 2.5184, + "step": 315200 + }, + { + "epoch": 8.08, + "learning_rate": 2.307613484988216e-05, + "loss": 2.3627, + "step": 315300 + }, + { + "epoch": 8.08, + "learning_rate": 2.306759572360556e-05, + "loss": 2.4564, + "step": 315400 + }, + { + "epoch": 8.08, + "learning_rate": 2.3059056597328963e-05, + "loss": 2.5968, + "step": 315500 + }, + { + "epoch": 8.08, + "learning_rate": 2.3050517471052365e-05, + "loss": 2.5117, + "step": 315600 + }, + { + "epoch": 8.09, + "learning_rate": 2.3041978344775763e-05, + "loss": 2.5541, + "step": 315700 + }, + { + "epoch": 8.09, + "learning_rate": 2.3033439218499164e-05, + "loss": 2.6094, + "step": 315800 + }, + { + "epoch": 8.09, + "learning_rate": 2.3024900092222566e-05, + "loss": 2.5938, + "step": 315900 + }, + { + "epoch": 8.1, + "learning_rate": 2.3016360965945964e-05, + "loss": 2.5505, + "step": 316000 + }, + { + "epoch": 8.1, + "learning_rate": 2.3007821839669366e-05, + "loss": 2.5796, + "step": 316100 + }, + { + "epoch": 8.1, + "learning_rate": 2.2999282713392767e-05, + "loss": 2.4089, + "step": 316200 + }, + { + "epoch": 8.1, + "learning_rate": 2.299074358711617e-05, + "loss": 2.5582, + "step": 316300 + }, + { + "epoch": 8.11, + "learning_rate": 2.2982204460839567e-05, + "loss": 2.5246, + "step": 316400 + }, + { + "epoch": 8.11, + "learning_rate": 2.297366533456297e-05, + "loss": 2.5751, + "step": 316500 + }, + { + "epoch": 8.11, + "learning_rate": 2.296512620828637e-05, + "loss": 2.6564, + "step": 316600 + }, + { + "epoch": 8.11, + "learning_rate": 2.295658708200977e-05, + "loss": 2.552, + "step": 316700 + }, + { + "epoch": 8.12, + "learning_rate": 2.294804795573317e-05, + "loss": 2.5467, + "step": 316800 + }, + { + "epoch": 8.12, + "learning_rate": 2.293950882945657e-05, + "loss": 2.4852, + "step": 316900 + }, + { + "epoch": 8.12, + "learning_rate": 2.2930969703179973e-05, + "loss": 2.4988, + "step": 317000 + }, + { + "epoch": 8.12, + "learning_rate": 2.2922430576903374e-05, + "loss": 2.505, + "step": 317100 + }, + { + "epoch": 8.13, + "learning_rate": 2.2913891450626772e-05, + "loss": 2.4597, + "step": 317200 + }, + { + "epoch": 8.13, + "learning_rate": 2.2905352324350174e-05, + "loss": 2.4072, + "step": 317300 + }, + { + "epoch": 8.13, + "learning_rate": 2.2896813198073576e-05, + "loss": 2.3633, + "step": 317400 + }, + { + "epoch": 8.13, + "learning_rate": 2.2888274071796974e-05, + "loss": 2.6036, + "step": 317500 + }, + { + "epoch": 8.14, + "learning_rate": 2.2879734945520375e-05, + "loss": 2.5262, + "step": 317600 + }, + { + "epoch": 8.14, + "learning_rate": 2.2871195819243777e-05, + "loss": 2.722, + "step": 317700 + }, + { + "epoch": 8.14, + "learning_rate": 2.286265669296718e-05, + "loss": 2.6565, + "step": 317800 + }, + { + "epoch": 8.14, + "learning_rate": 2.2854117566690576e-05, + "loss": 2.4619, + "step": 317900 + }, + { + "epoch": 8.15, + "learning_rate": 2.2845578440413978e-05, + "loss": 2.5398, + "step": 318000 + }, + { + "epoch": 8.15, + "learning_rate": 2.283703931413738e-05, + "loss": 2.6577, + "step": 318100 + }, + { + "epoch": 8.15, + "learning_rate": 2.282850018786078e-05, + "loss": 2.4912, + "step": 318200 + }, + { + "epoch": 8.15, + "learning_rate": 2.281996106158418e-05, + "loss": 2.5452, + "step": 318300 + }, + { + "epoch": 8.16, + "learning_rate": 2.281142193530758e-05, + "loss": 2.6178, + "step": 318400 + }, + { + "epoch": 8.16, + "learning_rate": 2.2802882809030982e-05, + "loss": 2.6458, + "step": 318500 + }, + { + "epoch": 8.16, + "learning_rate": 2.279434368275438e-05, + "loss": 2.6709, + "step": 318600 + }, + { + "epoch": 8.16, + "learning_rate": 2.2785804556477782e-05, + "loss": 2.5509, + "step": 318700 + }, + { + "epoch": 8.17, + "learning_rate": 2.2777265430201184e-05, + "loss": 2.5017, + "step": 318800 + }, + { + "epoch": 8.17, + "learning_rate": 2.2768726303924585e-05, + "loss": 2.483, + "step": 318900 + }, + { + "epoch": 8.17, + "learning_rate": 2.2760187177647983e-05, + "loss": 2.7111, + "step": 319000 + }, + { + "epoch": 8.17, + "learning_rate": 2.2751648051371385e-05, + "loss": 2.5306, + "step": 319100 + }, + { + "epoch": 8.18, + "learning_rate": 2.2743108925094786e-05, + "loss": 2.6905, + "step": 319200 + }, + { + "epoch": 8.18, + "learning_rate": 2.2734569798818188e-05, + "loss": 2.5214, + "step": 319300 + }, + { + "epoch": 8.18, + "learning_rate": 2.2726030672541586e-05, + "loss": 2.5986, + "step": 319400 + }, + { + "epoch": 8.18, + "learning_rate": 2.2717491546264988e-05, + "loss": 2.4584, + "step": 319500 + }, + { + "epoch": 8.19, + "learning_rate": 2.270895241998839e-05, + "loss": 2.4888, + "step": 319600 + }, + { + "epoch": 8.19, + "learning_rate": 2.270041329371179e-05, + "loss": 2.655, + "step": 319700 + }, + { + "epoch": 8.19, + "learning_rate": 2.269187416743519e-05, + "loss": 2.4383, + "step": 319800 + }, + { + "epoch": 8.19, + "learning_rate": 2.268333504115859e-05, + "loss": 2.557, + "step": 319900 + }, + { + "epoch": 8.2, + "learning_rate": 2.2674795914881992e-05, + "loss": 2.6075, + "step": 320000 + }, + { + "epoch": 8.2, + "learning_rate": 2.266625678860539e-05, + "loss": 2.6676, + "step": 320100 + }, + { + "epoch": 8.2, + "learning_rate": 2.2657717662328792e-05, + "loss": 2.5704, + "step": 320200 + }, + { + "epoch": 8.21, + "learning_rate": 2.2649178536052193e-05, + "loss": 2.5918, + "step": 320300 + }, + { + "epoch": 8.21, + "learning_rate": 2.2640639409775595e-05, + "loss": 2.6089, + "step": 320400 + }, + { + "epoch": 8.21, + "learning_rate": 2.2632100283498993e-05, + "loss": 2.5962, + "step": 320500 + }, + { + "epoch": 8.21, + "learning_rate": 2.2623561157222395e-05, + "loss": 2.5116, + "step": 320600 + }, + { + "epoch": 8.22, + "learning_rate": 2.2615022030945796e-05, + "loss": 2.6161, + "step": 320700 + }, + { + "epoch": 8.22, + "learning_rate": 2.2606482904669198e-05, + "loss": 2.4271, + "step": 320800 + }, + { + "epoch": 8.22, + "learning_rate": 2.2597943778392596e-05, + "loss": 2.5744, + "step": 320900 + }, + { + "epoch": 8.22, + "learning_rate": 2.2589404652115997e-05, + "loss": 2.4381, + "step": 321000 + }, + { + "epoch": 8.23, + "learning_rate": 2.25808655258394e-05, + "loss": 2.4659, + "step": 321100 + }, + { + "epoch": 8.23, + "learning_rate": 2.2572326399562797e-05, + "loss": 2.566, + "step": 321200 + }, + { + "epoch": 8.23, + "learning_rate": 2.25637872732862e-05, + "loss": 2.5267, + "step": 321300 + }, + { + "epoch": 8.23, + "learning_rate": 2.25552481470096e-05, + "loss": 2.6958, + "step": 321400 + }, + { + "epoch": 8.24, + "learning_rate": 2.2546709020733002e-05, + "loss": 2.4422, + "step": 321500 + }, + { + "epoch": 8.24, + "learning_rate": 2.25381698944564e-05, + "loss": 2.3804, + "step": 321600 + }, + { + "epoch": 8.24, + "learning_rate": 2.25296307681798e-05, + "loss": 2.5386, + "step": 321700 + }, + { + "epoch": 8.24, + "learning_rate": 2.2521091641903203e-05, + "loss": 2.5558, + "step": 321800 + }, + { + "epoch": 8.25, + "learning_rate": 2.2512552515626605e-05, + "loss": 2.5719, + "step": 321900 + }, + { + "epoch": 8.25, + "learning_rate": 2.2504013389350003e-05, + "loss": 2.509, + "step": 322000 + }, + { + "epoch": 8.25, + "learning_rate": 2.2495474263073404e-05, + "loss": 2.4556, + "step": 322100 + }, + { + "epoch": 8.25, + "learning_rate": 2.2486935136796806e-05, + "loss": 2.6549, + "step": 322200 + }, + { + "epoch": 8.26, + "learning_rate": 2.2478396010520207e-05, + "loss": 2.4002, + "step": 322300 + }, + { + "epoch": 8.26, + "learning_rate": 2.2469856884243605e-05, + "loss": 2.4812, + "step": 322400 + }, + { + "epoch": 8.26, + "learning_rate": 2.2461317757967007e-05, + "loss": 2.6121, + "step": 322500 + }, + { + "epoch": 8.26, + "learning_rate": 2.245277863169041e-05, + "loss": 2.5983, + "step": 322600 + }, + { + "epoch": 8.27, + "learning_rate": 2.2444239505413807e-05, + "loss": 2.4197, + "step": 322700 + }, + { + "epoch": 8.27, + "learning_rate": 2.2435700379137208e-05, + "loss": 2.733, + "step": 322800 + }, + { + "epoch": 8.27, + "learning_rate": 2.242716125286061e-05, + "loss": 2.6397, + "step": 322900 + }, + { + "epoch": 8.27, + "learning_rate": 2.241862212658401e-05, + "loss": 2.3667, + "step": 323000 + }, + { + "epoch": 8.28, + "learning_rate": 2.241008300030741e-05, + "loss": 2.5528, + "step": 323100 + }, + { + "epoch": 8.28, + "learning_rate": 2.240154387403081e-05, + "loss": 2.515, + "step": 323200 + }, + { + "epoch": 8.28, + "learning_rate": 2.2393004747754213e-05, + "loss": 2.5222, + "step": 323300 + }, + { + "epoch": 8.28, + "learning_rate": 2.2384465621477614e-05, + "loss": 2.5107, + "step": 323400 + }, + { + "epoch": 8.29, + "learning_rate": 2.2375926495201012e-05, + "loss": 2.5237, + "step": 323500 + }, + { + "epoch": 8.29, + "learning_rate": 2.2367387368924414e-05, + "loss": 2.4484, + "step": 323600 + }, + { + "epoch": 8.29, + "learning_rate": 2.2358848242647815e-05, + "loss": 2.5927, + "step": 323700 + }, + { + "epoch": 8.29, + "learning_rate": 2.2350309116371214e-05, + "loss": 2.3367, + "step": 323800 + }, + { + "epoch": 8.3, + "learning_rate": 2.2341769990094615e-05, + "loss": 2.5768, + "step": 323900 + }, + { + "epoch": 8.3, + "learning_rate": 2.2333230863818017e-05, + "loss": 2.6143, + "step": 324000 + }, + { + "epoch": 8.3, + "learning_rate": 2.2324691737541418e-05, + "loss": 2.4861, + "step": 324100 + }, + { + "epoch": 8.31, + "learning_rate": 2.2316152611264816e-05, + "loss": 2.6254, + "step": 324200 + }, + { + "epoch": 8.31, + "learning_rate": 2.2307613484988218e-05, + "loss": 2.6897, + "step": 324300 + }, + { + "epoch": 8.31, + "learning_rate": 2.2299074358711616e-05, + "loss": 2.451, + "step": 324400 + }, + { + "epoch": 8.31, + "learning_rate": 2.2290535232435018e-05, + "loss": 2.5221, + "step": 324500 + }, + { + "epoch": 8.32, + "learning_rate": 2.228199610615842e-05, + "loss": 2.5239, + "step": 324600 + }, + { + "epoch": 8.32, + "learning_rate": 2.2273456979881817e-05, + "loss": 2.5147, + "step": 324700 + }, + { + "epoch": 8.32, + "learning_rate": 2.226491785360522e-05, + "loss": 2.5661, + "step": 324800 + }, + { + "epoch": 8.32, + "learning_rate": 2.225637872732862e-05, + "loss": 2.5493, + "step": 324900 + }, + { + "epoch": 8.33, + "learning_rate": 2.224783960105202e-05, + "loss": 2.731, + "step": 325000 + }, + { + "epoch": 8.33, + "learning_rate": 2.223930047477542e-05, + "loss": 2.5801, + "step": 325100 + }, + { + "epoch": 8.33, + "learning_rate": 2.2230761348498822e-05, + "loss": 2.3902, + "step": 325200 + }, + { + "epoch": 8.33, + "learning_rate": 2.2222222222222223e-05, + "loss": 2.6621, + "step": 325300 + }, + { + "epoch": 8.34, + "learning_rate": 2.221368309594562e-05, + "loss": 2.7346, + "step": 325400 + }, + { + "epoch": 8.34, + "learning_rate": 2.2205143969669023e-05, + "loss": 2.4395, + "step": 325500 + }, + { + "epoch": 8.34, + "learning_rate": 2.2196604843392425e-05, + "loss": 2.4414, + "step": 325600 + }, + { + "epoch": 8.34, + "learning_rate": 2.2188065717115826e-05, + "loss": 2.4889, + "step": 325700 + }, + { + "epoch": 8.35, + "learning_rate": 2.2179526590839224e-05, + "loss": 2.5056, + "step": 325800 + }, + { + "epoch": 8.35, + "learning_rate": 2.2170987464562626e-05, + "loss": 2.6284, + "step": 325900 + }, + { + "epoch": 8.35, + "learning_rate": 2.2162448338286027e-05, + "loss": 2.5589, + "step": 326000 + }, + { + "epoch": 8.35, + "learning_rate": 2.2153909212009426e-05, + "loss": 2.5782, + "step": 326100 + }, + { + "epoch": 8.36, + "learning_rate": 2.2145370085732827e-05, + "loss": 2.6423, + "step": 326200 + }, + { + "epoch": 8.36, + "learning_rate": 2.213683095945623e-05, + "loss": 2.5087, + "step": 326300 + }, + { + "epoch": 8.36, + "learning_rate": 2.212829183317963e-05, + "loss": 2.4429, + "step": 326400 + }, + { + "epoch": 8.36, + "learning_rate": 2.211975270690303e-05, + "loss": 2.5103, + "step": 326500 + }, + { + "epoch": 8.37, + "learning_rate": 2.211121358062643e-05, + "loss": 2.6208, + "step": 326600 + }, + { + "epoch": 8.37, + "learning_rate": 2.210267445434983e-05, + "loss": 2.5342, + "step": 326700 + }, + { + "epoch": 8.37, + "learning_rate": 2.2094135328073233e-05, + "loss": 2.3838, + "step": 326800 + }, + { + "epoch": 8.37, + "learning_rate": 2.208559620179663e-05, + "loss": 2.4461, + "step": 326900 + }, + { + "epoch": 8.38, + "learning_rate": 2.2077057075520033e-05, + "loss": 2.4148, + "step": 327000 + }, + { + "epoch": 8.38, + "learning_rate": 2.2068517949243434e-05, + "loss": 2.5646, + "step": 327100 + }, + { + "epoch": 8.38, + "learning_rate": 2.2059978822966836e-05, + "loss": 2.4863, + "step": 327200 + }, + { + "epoch": 8.38, + "learning_rate": 2.2051439696690234e-05, + "loss": 2.5528, + "step": 327300 + }, + { + "epoch": 8.39, + "learning_rate": 2.2042900570413635e-05, + "loss": 2.5117, + "step": 327400 + }, + { + "epoch": 8.39, + "learning_rate": 2.2034361444137037e-05, + "loss": 2.5939, + "step": 327500 + }, + { + "epoch": 8.39, + "learning_rate": 2.2025822317860435e-05, + "loss": 2.4854, + "step": 327600 + }, + { + "epoch": 8.39, + "learning_rate": 2.2017283191583837e-05, + "loss": 2.6956, + "step": 327700 + }, + { + "epoch": 8.4, + "learning_rate": 2.2008744065307238e-05, + "loss": 2.7891, + "step": 327800 + }, + { + "epoch": 8.4, + "learning_rate": 2.200020493903064e-05, + "loss": 2.6001, + "step": 327900 + }, + { + "epoch": 8.4, + "learning_rate": 2.1991665812754038e-05, + "loss": 2.5006, + "step": 328000 + }, + { + "epoch": 8.41, + "learning_rate": 2.198312668647744e-05, + "loss": 2.5285, + "step": 328100 + }, + { + "epoch": 8.41, + "learning_rate": 2.197458756020084e-05, + "loss": 2.3425, + "step": 328200 + }, + { + "epoch": 8.41, + "learning_rate": 2.1966048433924243e-05, + "loss": 2.3846, + "step": 328300 + }, + { + "epoch": 8.41, + "learning_rate": 2.195750930764764e-05, + "loss": 2.4363, + "step": 328400 + }, + { + "epoch": 8.42, + "learning_rate": 2.1948970181371042e-05, + "loss": 2.4927, + "step": 328500 + }, + { + "epoch": 8.42, + "learning_rate": 2.1940431055094444e-05, + "loss": 2.5043, + "step": 328600 + }, + { + "epoch": 8.42, + "learning_rate": 2.1931891928817842e-05, + "loss": 2.4685, + "step": 328700 + }, + { + "epoch": 8.42, + "learning_rate": 2.1923352802541244e-05, + "loss": 2.5255, + "step": 328800 + }, + { + "epoch": 8.43, + "learning_rate": 2.1914813676264645e-05, + "loss": 2.4924, + "step": 328900 + }, + { + "epoch": 8.43, + "learning_rate": 2.1906274549988047e-05, + "loss": 2.5157, + "step": 329000 + }, + { + "epoch": 8.43, + "learning_rate": 2.1897735423711445e-05, + "loss": 2.6053, + "step": 329100 + }, + { + "epoch": 8.43, + "learning_rate": 2.1889196297434846e-05, + "loss": 2.3203, + "step": 329200 + }, + { + "epoch": 8.44, + "learning_rate": 2.1880657171158248e-05, + "loss": 2.6072, + "step": 329300 + }, + { + "epoch": 8.44, + "learning_rate": 2.187211804488165e-05, + "loss": 2.4769, + "step": 329400 + }, + { + "epoch": 8.44, + "learning_rate": 2.1863578918605048e-05, + "loss": 2.7022, + "step": 329500 + }, + { + "epoch": 8.44, + "learning_rate": 2.185503979232845e-05, + "loss": 2.6139, + "step": 329600 + }, + { + "epoch": 8.45, + "learning_rate": 2.184650066605185e-05, + "loss": 2.6004, + "step": 329700 + }, + { + "epoch": 8.45, + "learning_rate": 2.1837961539775252e-05, + "loss": 2.5469, + "step": 329800 + }, + { + "epoch": 8.45, + "learning_rate": 2.182942241349865e-05, + "loss": 2.6072, + "step": 329900 + }, + { + "epoch": 8.45, + "learning_rate": 2.1820883287222052e-05, + "loss": 2.3999, + "step": 330000 + }, + { + "epoch": 8.46, + "learning_rate": 2.1812344160945454e-05, + "loss": 2.6583, + "step": 330100 + }, + { + "epoch": 8.46, + "learning_rate": 2.1803805034668852e-05, + "loss": 2.4339, + "step": 330200 + }, + { + "epoch": 8.46, + "learning_rate": 2.1795265908392253e-05, + "loss": 2.5644, + "step": 330300 + }, + { + "epoch": 8.46, + "learning_rate": 2.1786726782115655e-05, + "loss": 2.4128, + "step": 330400 + }, + { + "epoch": 8.47, + "learning_rate": 2.1778187655839056e-05, + "loss": 2.5575, + "step": 330500 + }, + { + "epoch": 8.47, + "learning_rate": 2.1769648529562455e-05, + "loss": 2.6073, + "step": 330600 + }, + { + "epoch": 8.47, + "learning_rate": 2.1761109403285856e-05, + "loss": 2.5025, + "step": 330700 + }, + { + "epoch": 8.47, + "learning_rate": 2.1752570277009258e-05, + "loss": 2.5701, + "step": 330800 + }, + { + "epoch": 8.48, + "learning_rate": 2.174403115073266e-05, + "loss": 2.5583, + "step": 330900 + }, + { + "epoch": 8.48, + "learning_rate": 2.1735492024456057e-05, + "loss": 2.5079, + "step": 331000 + }, + { + "epoch": 8.48, + "learning_rate": 2.172695289817946e-05, + "loss": 2.591, + "step": 331100 + }, + { + "epoch": 8.48, + "learning_rate": 2.171841377190286e-05, + "loss": 2.5121, + "step": 331200 + }, + { + "epoch": 8.49, + "learning_rate": 2.1709874645626262e-05, + "loss": 2.5391, + "step": 331300 + }, + { + "epoch": 8.49, + "learning_rate": 2.170133551934966e-05, + "loss": 2.6695, + "step": 331400 + }, + { + "epoch": 8.49, + "learning_rate": 2.169279639307306e-05, + "loss": 2.2796, + "step": 331500 + }, + { + "epoch": 8.49, + "learning_rate": 2.1684257266796463e-05, + "loss": 2.6824, + "step": 331600 + }, + { + "epoch": 8.5, + "learning_rate": 2.167571814051986e-05, + "loss": 2.6698, + "step": 331700 + }, + { + "epoch": 8.5, + "learning_rate": 2.1667179014243263e-05, + "loss": 2.5086, + "step": 331800 + }, + { + "epoch": 8.5, + "learning_rate": 2.1658639887966664e-05, + "loss": 2.6209, + "step": 331900 + }, + { + "epoch": 8.5, + "learning_rate": 2.1650100761690066e-05, + "loss": 2.5076, + "step": 332000 + }, + { + "epoch": 8.51, + "learning_rate": 2.1641561635413464e-05, + "loss": 2.5979, + "step": 332100 + }, + { + "epoch": 8.51, + "learning_rate": 2.1633022509136866e-05, + "loss": 2.4665, + "step": 332200 + }, + { + "epoch": 8.51, + "learning_rate": 2.1624483382860267e-05, + "loss": 2.5746, + "step": 332300 + }, + { + "epoch": 8.52, + "learning_rate": 2.161594425658367e-05, + "loss": 2.3524, + "step": 332400 + }, + { + "epoch": 8.52, + "learning_rate": 2.1607405130307067e-05, + "loss": 2.3685, + "step": 332500 + }, + { + "epoch": 8.52, + "learning_rate": 2.159886600403047e-05, + "loss": 2.5041, + "step": 332600 + }, + { + "epoch": 8.52, + "learning_rate": 2.159032687775387e-05, + "loss": 2.5543, + "step": 332700 + }, + { + "epoch": 8.53, + "learning_rate": 2.1581787751477268e-05, + "loss": 2.5392, + "step": 332800 + }, + { + "epoch": 8.53, + "learning_rate": 2.157324862520067e-05, + "loss": 2.41, + "step": 332900 + }, + { + "epoch": 8.53, + "learning_rate": 2.156470949892407e-05, + "loss": 2.4839, + "step": 333000 + }, + { + "epoch": 8.53, + "learning_rate": 2.1556170372647473e-05, + "loss": 2.5286, + "step": 333100 + }, + { + "epoch": 8.54, + "learning_rate": 2.154763124637087e-05, + "loss": 2.3548, + "step": 333200 + }, + { + "epoch": 8.54, + "learning_rate": 2.1539092120094273e-05, + "loss": 2.6003, + "step": 333300 + }, + { + "epoch": 8.54, + "learning_rate": 2.1530552993817674e-05, + "loss": 2.5088, + "step": 333400 + }, + { + "epoch": 8.54, + "learning_rate": 2.1522013867541076e-05, + "loss": 2.5957, + "step": 333500 + }, + { + "epoch": 8.55, + "learning_rate": 2.1513474741264474e-05, + "loss": 2.589, + "step": 333600 + }, + { + "epoch": 8.55, + "learning_rate": 2.1504935614987875e-05, + "loss": 2.4592, + "step": 333700 + }, + { + "epoch": 8.55, + "learning_rate": 2.1496396488711277e-05, + "loss": 2.5387, + "step": 333800 + }, + { + "epoch": 8.55, + "learning_rate": 2.148785736243468e-05, + "loss": 2.4205, + "step": 333900 + }, + { + "epoch": 8.56, + "learning_rate": 2.1479318236158077e-05, + "loss": 2.5185, + "step": 334000 + }, + { + "epoch": 8.56, + "learning_rate": 2.1470779109881478e-05, + "loss": 2.437, + "step": 334100 + }, + { + "epoch": 8.56, + "learning_rate": 2.146223998360488e-05, + "loss": 2.4597, + "step": 334200 + }, + { + "epoch": 8.56, + "learning_rate": 2.1453700857328278e-05, + "loss": 2.419, + "step": 334300 + }, + { + "epoch": 8.57, + "learning_rate": 2.144516173105168e-05, + "loss": 2.3632, + "step": 334400 + }, + { + "epoch": 8.57, + "learning_rate": 2.143662260477508e-05, + "loss": 2.458, + "step": 334500 + }, + { + "epoch": 8.57, + "learning_rate": 2.1428083478498483e-05, + "loss": 2.4865, + "step": 334600 + }, + { + "epoch": 8.57, + "learning_rate": 2.141954435222188e-05, + "loss": 2.647, + "step": 334700 + }, + { + "epoch": 8.58, + "learning_rate": 2.1411005225945282e-05, + "loss": 2.6055, + "step": 334800 + }, + { + "epoch": 8.58, + "learning_rate": 2.1402466099668684e-05, + "loss": 2.4032, + "step": 334900 + }, + { + "epoch": 8.58, + "learning_rate": 2.1393926973392085e-05, + "loss": 2.5299, + "step": 335000 + }, + { + "epoch": 8.58, + "learning_rate": 2.1385387847115484e-05, + "loss": 2.4994, + "step": 335100 + }, + { + "epoch": 8.59, + "learning_rate": 2.1376848720838885e-05, + "loss": 2.5819, + "step": 335200 + }, + { + "epoch": 8.59, + "learning_rate": 2.1368309594562287e-05, + "loss": 2.3387, + "step": 335300 + }, + { + "epoch": 8.59, + "learning_rate": 2.1359770468285685e-05, + "loss": 2.5313, + "step": 335400 + }, + { + "epoch": 8.59, + "learning_rate": 2.1351231342009086e-05, + "loss": 2.5869, + "step": 335500 + }, + { + "epoch": 8.6, + "learning_rate": 2.1342692215732488e-05, + "loss": 2.475, + "step": 335600 + }, + { + "epoch": 8.6, + "learning_rate": 2.133415308945589e-05, + "loss": 2.5618, + "step": 335700 + }, + { + "epoch": 8.6, + "learning_rate": 2.1325613963179288e-05, + "loss": 2.6261, + "step": 335800 + }, + { + "epoch": 8.6, + "learning_rate": 2.131707483690269e-05, + "loss": 2.6525, + "step": 335900 + }, + { + "epoch": 8.61, + "learning_rate": 2.130853571062609e-05, + "loss": 2.67, + "step": 336000 + }, + { + "epoch": 8.61, + "learning_rate": 2.1299996584349492e-05, + "loss": 2.5276, + "step": 336100 + }, + { + "epoch": 8.61, + "learning_rate": 2.129145745807289e-05, + "loss": 2.5303, + "step": 336200 + }, + { + "epoch": 8.62, + "learning_rate": 2.1282918331796292e-05, + "loss": 2.4886, + "step": 336300 + }, + { + "epoch": 8.62, + "learning_rate": 2.1274379205519693e-05, + "loss": 2.4955, + "step": 336400 + }, + { + "epoch": 8.62, + "learning_rate": 2.1265840079243095e-05, + "loss": 2.5971, + "step": 336500 + }, + { + "epoch": 8.62, + "learning_rate": 2.1257300952966493e-05, + "loss": 2.5558, + "step": 336600 + }, + { + "epoch": 8.63, + "learning_rate": 2.1248761826689895e-05, + "loss": 2.5548, + "step": 336700 + }, + { + "epoch": 8.63, + "learning_rate": 2.1240222700413296e-05, + "loss": 2.4898, + "step": 336800 + }, + { + "epoch": 8.63, + "learning_rate": 2.1231683574136694e-05, + "loss": 2.5943, + "step": 336900 + }, + { + "epoch": 8.63, + "learning_rate": 2.1223144447860096e-05, + "loss": 2.5476, + "step": 337000 + }, + { + "epoch": 8.64, + "learning_rate": 2.1214605321583498e-05, + "loss": 2.3956, + "step": 337100 + }, + { + "epoch": 8.64, + "learning_rate": 2.12060661953069e-05, + "loss": 2.5946, + "step": 337200 + }, + { + "epoch": 8.64, + "learning_rate": 2.1197527069030297e-05, + "loss": 2.5176, + "step": 337300 + }, + { + "epoch": 8.64, + "learning_rate": 2.11889879427537e-05, + "loss": 2.5441, + "step": 337400 + }, + { + "epoch": 8.65, + "learning_rate": 2.11804488164771e-05, + "loss": 2.4069, + "step": 337500 + }, + { + "epoch": 8.65, + "learning_rate": 2.1171909690200502e-05, + "loss": 2.4024, + "step": 337600 + }, + { + "epoch": 8.65, + "learning_rate": 2.11633705639239e-05, + "loss": 2.5113, + "step": 337700 + }, + { + "epoch": 8.65, + "learning_rate": 2.11548314376473e-05, + "loss": 2.5701, + "step": 337800 + }, + { + "epoch": 8.66, + "learning_rate": 2.1146292311370703e-05, + "loss": 2.4322, + "step": 337900 + }, + { + "epoch": 8.66, + "learning_rate": 2.11377531850941e-05, + "loss": 2.4589, + "step": 338000 + }, + { + "epoch": 8.66, + "learning_rate": 2.1129214058817503e-05, + "loss": 2.6578, + "step": 338100 + }, + { + "epoch": 8.66, + "learning_rate": 2.1120674932540904e-05, + "loss": 2.4323, + "step": 338200 + }, + { + "epoch": 8.67, + "learning_rate": 2.1112135806264306e-05, + "loss": 2.4691, + "step": 338300 + }, + { + "epoch": 8.67, + "learning_rate": 2.1103596679987704e-05, + "loss": 2.5593, + "step": 338400 + }, + { + "epoch": 8.67, + "learning_rate": 2.1095057553711106e-05, + "loss": 2.6265, + "step": 338500 + }, + { + "epoch": 8.67, + "learning_rate": 2.1086518427434507e-05, + "loss": 2.6088, + "step": 338600 + }, + { + "epoch": 8.68, + "learning_rate": 2.107797930115791e-05, + "loss": 2.1967, + "step": 338700 + }, + { + "epoch": 8.68, + "learning_rate": 2.1069440174881307e-05, + "loss": 2.5874, + "step": 338800 + }, + { + "epoch": 8.68, + "learning_rate": 2.106090104860471e-05, + "loss": 2.3878, + "step": 338900 + }, + { + "epoch": 8.68, + "learning_rate": 2.105236192232811e-05, + "loss": 2.4317, + "step": 339000 + }, + { + "epoch": 8.69, + "learning_rate": 2.104382279605151e-05, + "loss": 2.4738, + "step": 339100 + }, + { + "epoch": 8.69, + "learning_rate": 2.103528366977491e-05, + "loss": 2.5272, + "step": 339200 + }, + { + "epoch": 8.69, + "learning_rate": 2.102674454349831e-05, + "loss": 2.5015, + "step": 339300 + }, + { + "epoch": 8.69, + "learning_rate": 2.1018205417221713e-05, + "loss": 2.521, + "step": 339400 + }, + { + "epoch": 8.7, + "learning_rate": 2.100966629094511e-05, + "loss": 2.5521, + "step": 339500 + }, + { + "epoch": 8.7, + "learning_rate": 2.1001127164668513e-05, + "loss": 2.58, + "step": 339600 + }, + { + "epoch": 8.7, + "learning_rate": 2.0992588038391914e-05, + "loss": 2.5704, + "step": 339700 + }, + { + "epoch": 8.7, + "learning_rate": 2.0984048912115316e-05, + "loss": 2.588, + "step": 339800 + }, + { + "epoch": 8.71, + "learning_rate": 2.0975509785838714e-05, + "loss": 2.6017, + "step": 339900 + }, + { + "epoch": 8.71, + "learning_rate": 2.0966970659562115e-05, + "loss": 2.5664, + "step": 340000 + }, + { + "epoch": 8.71, + "learning_rate": 2.0958431533285517e-05, + "loss": 2.493, + "step": 340100 + }, + { + "epoch": 8.72, + "learning_rate": 2.094989240700892e-05, + "loss": 2.6741, + "step": 340200 + }, + { + "epoch": 8.72, + "learning_rate": 2.0941353280732317e-05, + "loss": 2.2934, + "step": 340300 + }, + { + "epoch": 8.72, + "learning_rate": 2.0932814154455718e-05, + "loss": 2.4713, + "step": 340400 + }, + { + "epoch": 8.72, + "learning_rate": 2.092427502817912e-05, + "loss": 2.6073, + "step": 340500 + }, + { + "epoch": 8.73, + "learning_rate": 2.0915735901902518e-05, + "loss": 2.5486, + "step": 340600 + }, + { + "epoch": 8.73, + "learning_rate": 2.090719677562592e-05, + "loss": 2.5145, + "step": 340700 + }, + { + "epoch": 8.73, + "learning_rate": 2.089865764934932e-05, + "loss": 2.592, + "step": 340800 + }, + { + "epoch": 8.73, + "learning_rate": 2.0890118523072723e-05, + "loss": 2.7031, + "step": 340900 + }, + { + "epoch": 8.74, + "learning_rate": 2.088157939679612e-05, + "loss": 2.3675, + "step": 341000 + }, + { + "epoch": 8.74, + "learning_rate": 2.0873040270519522e-05, + "loss": 2.6031, + "step": 341100 + }, + { + "epoch": 8.74, + "learning_rate": 2.0864501144242924e-05, + "loss": 2.4581, + "step": 341200 + }, + { + "epoch": 8.74, + "learning_rate": 2.0855962017966325e-05, + "loss": 2.6458, + "step": 341300 + }, + { + "epoch": 8.75, + "learning_rate": 2.0847422891689723e-05, + "loss": 2.6926, + "step": 341400 + }, + { + "epoch": 8.75, + "learning_rate": 2.0838883765413125e-05, + "loss": 2.5385, + "step": 341500 + }, + { + "epoch": 8.75, + "learning_rate": 2.0830344639136527e-05, + "loss": 2.5288, + "step": 341600 + }, + { + "epoch": 8.75, + "learning_rate": 2.0821805512859928e-05, + "loss": 2.4888, + "step": 341700 + }, + { + "epoch": 8.76, + "learning_rate": 2.0813266386583326e-05, + "loss": 2.5076, + "step": 341800 + }, + { + "epoch": 8.76, + "learning_rate": 2.0804727260306724e-05, + "loss": 2.3994, + "step": 341900 + }, + { + "epoch": 8.76, + "learning_rate": 2.0796188134030126e-05, + "loss": 2.6172, + "step": 342000 + }, + { + "epoch": 8.76, + "learning_rate": 2.0787649007753528e-05, + "loss": 2.5673, + "step": 342100 + }, + { + "epoch": 8.77, + "learning_rate": 2.0779109881476926e-05, + "loss": 2.519, + "step": 342200 + }, + { + "epoch": 8.77, + "learning_rate": 2.0770570755200327e-05, + "loss": 2.6818, + "step": 342300 + }, + { + "epoch": 8.77, + "learning_rate": 2.076203162892373e-05, + "loss": 2.4862, + "step": 342400 + }, + { + "epoch": 8.77, + "learning_rate": 2.075349250264713e-05, + "loss": 2.4133, + "step": 342500 + }, + { + "epoch": 8.78, + "learning_rate": 2.074495337637053e-05, + "loss": 2.6491, + "step": 342600 + }, + { + "epoch": 8.78, + "learning_rate": 2.073641425009393e-05, + "loss": 2.3798, + "step": 342700 + }, + { + "epoch": 8.78, + "learning_rate": 2.072787512381733e-05, + "loss": 2.4769, + "step": 342800 + }, + { + "epoch": 8.78, + "learning_rate": 2.071933599754073e-05, + "loss": 2.5758, + "step": 342900 + }, + { + "epoch": 8.79, + "learning_rate": 2.071079687126413e-05, + "loss": 2.5529, + "step": 343000 + }, + { + "epoch": 8.79, + "learning_rate": 2.0702257744987533e-05, + "loss": 2.288, + "step": 343100 + }, + { + "epoch": 8.79, + "learning_rate": 2.0693718618710934e-05, + "loss": 2.5995, + "step": 343200 + }, + { + "epoch": 8.79, + "learning_rate": 2.0685179492434333e-05, + "loss": 2.4792, + "step": 343300 + }, + { + "epoch": 8.8, + "learning_rate": 2.0676640366157734e-05, + "loss": 2.4033, + "step": 343400 + }, + { + "epoch": 8.8, + "learning_rate": 2.0668101239881136e-05, + "loss": 2.5598, + "step": 343500 + }, + { + "epoch": 8.8, + "learning_rate": 2.0659562113604537e-05, + "loss": 2.5351, + "step": 343600 + }, + { + "epoch": 8.8, + "learning_rate": 2.0651022987327935e-05, + "loss": 2.3781, + "step": 343700 + }, + { + "epoch": 8.81, + "learning_rate": 2.0642483861051337e-05, + "loss": 2.2974, + "step": 343800 + }, + { + "epoch": 8.81, + "learning_rate": 2.063394473477474e-05, + "loss": 2.5745, + "step": 343900 + }, + { + "epoch": 8.81, + "learning_rate": 2.062540560849814e-05, + "loss": 2.4677, + "step": 344000 + }, + { + "epoch": 8.81, + "learning_rate": 2.0616866482221538e-05, + "loss": 2.4528, + "step": 344100 + }, + { + "epoch": 8.82, + "learning_rate": 2.060832735594494e-05, + "loss": 2.524, + "step": 344200 + }, + { + "epoch": 8.82, + "learning_rate": 2.059978822966834e-05, + "loss": 2.5958, + "step": 344300 + }, + { + "epoch": 8.82, + "learning_rate": 2.059124910339174e-05, + "loss": 2.6228, + "step": 344400 + }, + { + "epoch": 8.83, + "learning_rate": 2.058270997711514e-05, + "loss": 2.5172, + "step": 344500 + }, + { + "epoch": 8.83, + "learning_rate": 2.0574170850838543e-05, + "loss": 2.5492, + "step": 344600 + }, + { + "epoch": 8.83, + "learning_rate": 2.0565631724561944e-05, + "loss": 2.5259, + "step": 344700 + }, + { + "epoch": 8.83, + "learning_rate": 2.0557092598285342e-05, + "loss": 2.494, + "step": 344800 + }, + { + "epoch": 8.84, + "learning_rate": 2.0548553472008744e-05, + "loss": 2.601, + "step": 344900 + }, + { + "epoch": 8.84, + "learning_rate": 2.0540014345732145e-05, + "loss": 2.4916, + "step": 345000 + }, + { + "epoch": 8.84, + "learning_rate": 2.0531475219455547e-05, + "loss": 2.6532, + "step": 345100 + }, + { + "epoch": 8.84, + "learning_rate": 2.0522936093178945e-05, + "loss": 2.574, + "step": 345200 + }, + { + "epoch": 8.85, + "learning_rate": 2.0514396966902347e-05, + "loss": 2.483, + "step": 345300 + }, + { + "epoch": 8.85, + "learning_rate": 2.0505857840625748e-05, + "loss": 2.4472, + "step": 345400 + }, + { + "epoch": 8.85, + "learning_rate": 2.0497318714349146e-05, + "loss": 2.5283, + "step": 345500 + }, + { + "epoch": 8.85, + "learning_rate": 2.0488779588072548e-05, + "loss": 2.4242, + "step": 345600 + }, + { + "epoch": 8.86, + "learning_rate": 2.048024046179595e-05, + "loss": 2.311, + "step": 345700 + }, + { + "epoch": 8.86, + "learning_rate": 2.047170133551935e-05, + "loss": 2.3901, + "step": 345800 + }, + { + "epoch": 8.86, + "learning_rate": 2.046316220924275e-05, + "loss": 2.4731, + "step": 345900 + }, + { + "epoch": 8.86, + "learning_rate": 2.045462308296615e-05, + "loss": 2.423, + "step": 346000 + }, + { + "epoch": 8.87, + "learning_rate": 2.0446083956689552e-05, + "loss": 2.5762, + "step": 346100 + }, + { + "epoch": 8.87, + "learning_rate": 2.0437544830412954e-05, + "loss": 2.5543, + "step": 346200 + }, + { + "epoch": 8.87, + "learning_rate": 2.0429005704136352e-05, + "loss": 2.5611, + "step": 346300 + }, + { + "epoch": 8.87, + "learning_rate": 2.0420466577859753e-05, + "loss": 2.5478, + "step": 346400 + }, + { + "epoch": 8.88, + "learning_rate": 2.0411927451583155e-05, + "loss": 2.5788, + "step": 346500 + }, + { + "epoch": 8.88, + "learning_rate": 2.0403388325306557e-05, + "loss": 2.6245, + "step": 346600 + }, + { + "epoch": 8.88, + "learning_rate": 2.0394849199029955e-05, + "loss": 2.4401, + "step": 346700 + }, + { + "epoch": 8.88, + "learning_rate": 2.0386310072753356e-05, + "loss": 2.3803, + "step": 346800 + }, + { + "epoch": 8.89, + "learning_rate": 2.0377770946476758e-05, + "loss": 2.4374, + "step": 346900 + }, + { + "epoch": 8.89, + "learning_rate": 2.0369231820200156e-05, + "loss": 2.7678, + "step": 347000 + }, + { + "epoch": 8.89, + "learning_rate": 2.0360692693923558e-05, + "loss": 2.513, + "step": 347100 + }, + { + "epoch": 8.89, + "learning_rate": 2.035215356764696e-05, + "loss": 2.646, + "step": 347200 + }, + { + "epoch": 8.9, + "learning_rate": 2.034361444137036e-05, + "loss": 2.4675, + "step": 347300 + }, + { + "epoch": 8.9, + "learning_rate": 2.033507531509376e-05, + "loss": 2.5015, + "step": 347400 + }, + { + "epoch": 8.9, + "learning_rate": 2.032653618881716e-05, + "loss": 2.3507, + "step": 347500 + }, + { + "epoch": 8.9, + "learning_rate": 2.0317997062540562e-05, + "loss": 2.4468, + "step": 347600 + }, + { + "epoch": 8.91, + "learning_rate": 2.0309457936263963e-05, + "loss": 2.4786, + "step": 347700 + }, + { + "epoch": 8.91, + "learning_rate": 2.030091880998736e-05, + "loss": 2.5348, + "step": 347800 + }, + { + "epoch": 8.91, + "learning_rate": 2.0292379683710763e-05, + "loss": 2.6023, + "step": 347900 + }, + { + "epoch": 8.91, + "learning_rate": 2.0283840557434165e-05, + "loss": 2.3332, + "step": 348000 + }, + { + "epoch": 8.92, + "learning_rate": 2.0275301431157566e-05, + "loss": 2.6247, + "step": 348100 + }, + { + "epoch": 8.92, + "learning_rate": 2.0266762304880964e-05, + "loss": 2.5805, + "step": 348200 + }, + { + "epoch": 8.92, + "learning_rate": 2.0258223178604366e-05, + "loss": 2.4321, + "step": 348300 + }, + { + "epoch": 8.93, + "learning_rate": 2.0249684052327767e-05, + "loss": 2.3058, + "step": 348400 + }, + { + "epoch": 8.93, + "learning_rate": 2.0241144926051166e-05, + "loss": 2.2775, + "step": 348500 + }, + { + "epoch": 8.93, + "learning_rate": 2.0232605799774567e-05, + "loss": 2.6537, + "step": 348600 + }, + { + "epoch": 8.93, + "learning_rate": 2.022406667349797e-05, + "loss": 2.6038, + "step": 348700 + }, + { + "epoch": 8.94, + "learning_rate": 2.021552754722137e-05, + "loss": 2.4554, + "step": 348800 + }, + { + "epoch": 8.94, + "learning_rate": 2.020698842094477e-05, + "loss": 2.4168, + "step": 348900 + }, + { + "epoch": 8.94, + "learning_rate": 2.019844929466817e-05, + "loss": 2.4945, + "step": 349000 + }, + { + "epoch": 8.94, + "learning_rate": 2.018991016839157e-05, + "loss": 2.4791, + "step": 349100 + }, + { + "epoch": 8.95, + "learning_rate": 2.0181371042114973e-05, + "loss": 2.3889, + "step": 349200 + }, + { + "epoch": 8.95, + "learning_rate": 2.017283191583837e-05, + "loss": 2.4123, + "step": 349300 + }, + { + "epoch": 8.95, + "learning_rate": 2.0164292789561773e-05, + "loss": 2.4595, + "step": 349400 + }, + { + "epoch": 8.95, + "learning_rate": 2.0155753663285174e-05, + "loss": 2.3611, + "step": 349500 + }, + { + "epoch": 8.96, + "learning_rate": 2.0147214537008572e-05, + "loss": 2.5504, + "step": 349600 + }, + { + "epoch": 8.96, + "learning_rate": 2.0138675410731974e-05, + "loss": 2.4024, + "step": 349700 + }, + { + "epoch": 8.96, + "learning_rate": 2.0130136284455376e-05, + "loss": 2.6099, + "step": 349800 + }, + { + "epoch": 8.96, + "learning_rate": 2.0121597158178777e-05, + "loss": 2.5468, + "step": 349900 + }, + { + "epoch": 8.97, + "learning_rate": 2.0113058031902175e-05, + "loss": 2.3558, + "step": 350000 + }, + { + "epoch": 8.97, + "learning_rate": 2.0104518905625577e-05, + "loss": 2.5213, + "step": 350100 + }, + { + "epoch": 8.97, + "learning_rate": 2.009597977934898e-05, + "loss": 2.3691, + "step": 350200 + }, + { + "epoch": 8.97, + "learning_rate": 2.008744065307238e-05, + "loss": 2.6007, + "step": 350300 + }, + { + "epoch": 8.98, + "learning_rate": 2.0078901526795778e-05, + "loss": 2.5096, + "step": 350400 + }, + { + "epoch": 8.98, + "learning_rate": 2.007036240051918e-05, + "loss": 2.5878, + "step": 350500 + }, + { + "epoch": 8.98, + "learning_rate": 2.006182327424258e-05, + "loss": 2.6019, + "step": 350600 + }, + { + "epoch": 8.98, + "learning_rate": 2.0053284147965983e-05, + "loss": 2.5281, + "step": 350700 + }, + { + "epoch": 8.99, + "learning_rate": 2.004474502168938e-05, + "loss": 2.3759, + "step": 350800 + }, + { + "epoch": 8.99, + "learning_rate": 2.0036205895412782e-05, + "loss": 2.4643, + "step": 350900 + }, + { + "epoch": 8.99, + "learning_rate": 2.0027666769136184e-05, + "loss": 2.2171, + "step": 351000 + }, + { + "epoch": 8.99, + "learning_rate": 2.0019127642859582e-05, + "loss": 2.5775, + "step": 351100 + }, + { + "epoch": 9.0, + "learning_rate": 2.0010588516582984e-05, + "loss": 2.36, + "step": 351200 + }, + { + "epoch": 9.0, + "learning_rate": 2.0002049390306385e-05, + "loss": 2.3243, + "step": 351300 + }, + { + "epoch": 9.0, + "learning_rate": 1.9993510264029787e-05, + "loss": 2.4882, + "step": 351400 + }, + { + "epoch": 9.0, + "learning_rate": 1.9984971137753185e-05, + "loss": 2.5443, + "step": 351500 + }, + { + "epoch": 9.01, + "learning_rate": 1.9976432011476587e-05, + "loss": 2.486, + "step": 351600 + }, + { + "epoch": 9.01, + "learning_rate": 1.9967892885199988e-05, + "loss": 2.3474, + "step": 351700 + }, + { + "epoch": 9.01, + "learning_rate": 1.995935375892339e-05, + "loss": 2.3448, + "step": 351800 + }, + { + "epoch": 9.01, + "learning_rate": 1.9950814632646788e-05, + "loss": 2.4835, + "step": 351900 + }, + { + "epoch": 9.02, + "learning_rate": 1.994227550637019e-05, + "loss": 2.3045, + "step": 352000 + }, + { + "epoch": 9.02, + "learning_rate": 1.993373638009359e-05, + "loss": 2.5893, + "step": 352100 + }, + { + "epoch": 9.02, + "learning_rate": 1.992519725381699e-05, + "loss": 2.5106, + "step": 352200 + }, + { + "epoch": 9.03, + "learning_rate": 1.991665812754039e-05, + "loss": 2.4664, + "step": 352300 + }, + { + "epoch": 9.03, + "learning_rate": 1.9908119001263792e-05, + "loss": 2.5472, + "step": 352400 + }, + { + "epoch": 9.03, + "learning_rate": 1.9899579874987194e-05, + "loss": 2.6176, + "step": 352500 + }, + { + "epoch": 9.03, + "learning_rate": 1.9891040748710592e-05, + "loss": 2.5066, + "step": 352600 + }, + { + "epoch": 9.04, + "learning_rate": 1.9882501622433993e-05, + "loss": 2.4152, + "step": 352700 + }, + { + "epoch": 9.04, + "learning_rate": 1.9873962496157395e-05, + "loss": 2.2691, + "step": 352800 + }, + { + "epoch": 9.04, + "learning_rate": 1.9865423369880796e-05, + "loss": 2.4565, + "step": 352900 + }, + { + "epoch": 9.04, + "learning_rate": 1.9856884243604195e-05, + "loss": 2.4942, + "step": 353000 + }, + { + "epoch": 9.05, + "learning_rate": 1.9848345117327596e-05, + "loss": 2.5106, + "step": 353100 + }, + { + "epoch": 9.05, + "learning_rate": 1.9839805991050998e-05, + "loss": 2.4466, + "step": 353200 + }, + { + "epoch": 9.05, + "learning_rate": 1.98312668647744e-05, + "loss": 2.5558, + "step": 353300 + }, + { + "epoch": 9.05, + "learning_rate": 1.9822727738497797e-05, + "loss": 2.7191, + "step": 353400 + }, + { + "epoch": 9.06, + "learning_rate": 1.98141886122212e-05, + "loss": 2.5976, + "step": 353500 + }, + { + "epoch": 9.06, + "learning_rate": 1.98056494859446e-05, + "loss": 2.425, + "step": 353600 + }, + { + "epoch": 9.06, + "learning_rate": 1.9797110359668e-05, + "loss": 2.3932, + "step": 353700 + }, + { + "epoch": 9.06, + "learning_rate": 1.97885712333914e-05, + "loss": 2.4325, + "step": 353800 + }, + { + "epoch": 9.07, + "learning_rate": 1.9780032107114802e-05, + "loss": 2.3956, + "step": 353900 + }, + { + "epoch": 9.07, + "learning_rate": 1.9771492980838203e-05, + "loss": 2.5554, + "step": 354000 + }, + { + "epoch": 9.07, + "learning_rate": 1.97629538545616e-05, + "loss": 2.3288, + "step": 354100 + }, + { + "epoch": 9.07, + "learning_rate": 1.9754414728285003e-05, + "loss": 2.5212, + "step": 354200 + }, + { + "epoch": 9.08, + "learning_rate": 1.9745875602008405e-05, + "loss": 2.4385, + "step": 354300 + }, + { + "epoch": 9.08, + "learning_rate": 1.9737336475731806e-05, + "loss": 2.5364, + "step": 354400 + }, + { + "epoch": 9.08, + "learning_rate": 1.9728797349455204e-05, + "loss": 2.5373, + "step": 354500 + }, + { + "epoch": 9.08, + "learning_rate": 1.9720258223178606e-05, + "loss": 2.3519, + "step": 354600 + }, + { + "epoch": 9.09, + "learning_rate": 1.9711719096902007e-05, + "loss": 2.6551, + "step": 354700 + }, + { + "epoch": 9.09, + "learning_rate": 1.9703179970625406e-05, + "loss": 2.4575, + "step": 354800 + }, + { + "epoch": 9.09, + "learning_rate": 1.9694640844348807e-05, + "loss": 2.3641, + "step": 354900 + }, + { + "epoch": 9.09, + "learning_rate": 1.968610171807221e-05, + "loss": 2.5151, + "step": 355000 + }, + { + "epoch": 9.1, + "learning_rate": 1.967756259179561e-05, + "loss": 2.4773, + "step": 355100 + }, + { + "epoch": 9.1, + "learning_rate": 1.966902346551901e-05, + "loss": 2.3488, + "step": 355200 + }, + { + "epoch": 9.1, + "learning_rate": 1.966048433924241e-05, + "loss": 2.5454, + "step": 355300 + }, + { + "epoch": 9.1, + "learning_rate": 1.965194521296581e-05, + "loss": 2.38, + "step": 355400 + }, + { + "epoch": 9.11, + "learning_rate": 1.9643406086689213e-05, + "loss": 2.3771, + "step": 355500 + }, + { + "epoch": 9.11, + "learning_rate": 1.963486696041261e-05, + "loss": 2.5326, + "step": 355600 + }, + { + "epoch": 9.11, + "learning_rate": 1.9626327834136013e-05, + "loss": 2.4057, + "step": 355700 + }, + { + "epoch": 9.11, + "learning_rate": 1.9617788707859414e-05, + "loss": 2.2275, + "step": 355800 + }, + { + "epoch": 9.12, + "learning_rate": 1.9609249581582816e-05, + "loss": 2.4473, + "step": 355900 + }, + { + "epoch": 9.12, + "learning_rate": 1.9600710455306214e-05, + "loss": 2.3546, + "step": 356000 + }, + { + "epoch": 9.12, + "learning_rate": 1.9592171329029616e-05, + "loss": 2.3716, + "step": 356100 + }, + { + "epoch": 9.12, + "learning_rate": 1.9583632202753017e-05, + "loss": 2.4092, + "step": 356200 + }, + { + "epoch": 9.13, + "learning_rate": 1.9575093076476415e-05, + "loss": 2.4315, + "step": 356300 + }, + { + "epoch": 9.13, + "learning_rate": 1.9566553950199817e-05, + "loss": 2.454, + "step": 356400 + }, + { + "epoch": 9.13, + "learning_rate": 1.955801482392322e-05, + "loss": 2.5591, + "step": 356500 + }, + { + "epoch": 9.14, + "learning_rate": 1.954947569764662e-05, + "loss": 2.4718, + "step": 356600 + }, + { + "epoch": 9.14, + "learning_rate": 1.9540936571370018e-05, + "loss": 2.2909, + "step": 356700 + }, + { + "epoch": 9.14, + "learning_rate": 1.953239744509342e-05, + "loss": 2.6231, + "step": 356800 + }, + { + "epoch": 9.14, + "learning_rate": 1.952385831881682e-05, + "loss": 2.3351, + "step": 356900 + }, + { + "epoch": 9.15, + "learning_rate": 1.9515319192540223e-05, + "loss": 2.5459, + "step": 357000 + }, + { + "epoch": 9.15, + "learning_rate": 1.950678006626362e-05, + "loss": 2.5014, + "step": 357100 + }, + { + "epoch": 9.15, + "learning_rate": 1.9498240939987022e-05, + "loss": 2.5505, + "step": 357200 + }, + { + "epoch": 9.15, + "learning_rate": 1.9489701813710424e-05, + "loss": 2.3888, + "step": 357300 + }, + { + "epoch": 9.16, + "learning_rate": 1.9481162687433822e-05, + "loss": 2.5272, + "step": 357400 + }, + { + "epoch": 9.16, + "learning_rate": 1.9472623561157224e-05, + "loss": 2.4352, + "step": 357500 + }, + { + "epoch": 9.16, + "learning_rate": 1.9464084434880625e-05, + "loss": 2.4541, + "step": 357600 + }, + { + "epoch": 9.16, + "learning_rate": 1.9455545308604027e-05, + "loss": 2.2915, + "step": 357700 + }, + { + "epoch": 9.17, + "learning_rate": 1.9447006182327425e-05, + "loss": 2.5464, + "step": 357800 + }, + { + "epoch": 9.17, + "learning_rate": 1.9438467056050826e-05, + "loss": 2.3428, + "step": 357900 + }, + { + "epoch": 9.17, + "learning_rate": 1.9429927929774228e-05, + "loss": 2.3803, + "step": 358000 + }, + { + "epoch": 9.17, + "learning_rate": 1.942138880349763e-05, + "loss": 2.4407, + "step": 358100 + }, + { + "epoch": 9.18, + "learning_rate": 1.9412849677221028e-05, + "loss": 2.4178, + "step": 358200 + }, + { + "epoch": 9.18, + "learning_rate": 1.940431055094443e-05, + "loss": 2.5146, + "step": 358300 + }, + { + "epoch": 9.18, + "learning_rate": 1.939577142466783e-05, + "loss": 2.4201, + "step": 358400 + }, + { + "epoch": 9.18, + "learning_rate": 1.9387232298391232e-05, + "loss": 2.5639, + "step": 358500 + }, + { + "epoch": 9.19, + "learning_rate": 1.937869317211463e-05, + "loss": 2.4283, + "step": 358600 + }, + { + "epoch": 9.19, + "learning_rate": 1.9370154045838032e-05, + "loss": 2.4783, + "step": 358700 + }, + { + "epoch": 9.19, + "learning_rate": 1.9361614919561434e-05, + "loss": 2.5163, + "step": 358800 + }, + { + "epoch": 9.19, + "learning_rate": 1.9353075793284832e-05, + "loss": 2.3794, + "step": 358900 + }, + { + "epoch": 9.2, + "learning_rate": 1.9344536667008233e-05, + "loss": 2.6358, + "step": 359000 + }, + { + "epoch": 9.2, + "learning_rate": 1.9335997540731635e-05, + "loss": 2.5169, + "step": 359100 + }, + { + "epoch": 9.2, + "learning_rate": 1.9327458414455036e-05, + "loss": 2.3942, + "step": 359200 + }, + { + "epoch": 9.2, + "learning_rate": 1.9318919288178435e-05, + "loss": 2.5266, + "step": 359300 + }, + { + "epoch": 9.21, + "learning_rate": 1.9310380161901833e-05, + "loss": 2.4013, + "step": 359400 + }, + { + "epoch": 9.21, + "learning_rate": 1.9301841035625234e-05, + "loss": 2.4278, + "step": 359500 + }, + { + "epoch": 9.21, + "learning_rate": 1.9293301909348636e-05, + "loss": 2.3956, + "step": 359600 + }, + { + "epoch": 9.21, + "learning_rate": 1.9284762783072034e-05, + "loss": 2.4784, + "step": 359700 + }, + { + "epoch": 9.22, + "learning_rate": 1.9276223656795436e-05, + "loss": 2.4385, + "step": 359800 + }, + { + "epoch": 9.22, + "learning_rate": 1.9267684530518837e-05, + "loss": 2.4665, + "step": 359900 + }, + { + "epoch": 9.22, + "learning_rate": 1.925914540424224e-05, + "loss": 2.369, + "step": 360000 + }, + { + "epoch": 9.22, + "learning_rate": 1.9250606277965637e-05, + "loss": 2.4647, + "step": 360100 + }, + { + "epoch": 9.23, + "learning_rate": 1.924206715168904e-05, + "loss": 2.4181, + "step": 360200 + }, + { + "epoch": 9.23, + "learning_rate": 1.923352802541244e-05, + "loss": 2.3927, + "step": 360300 + }, + { + "epoch": 9.23, + "learning_rate": 1.922498889913584e-05, + "loss": 2.4444, + "step": 360400 + }, + { + "epoch": 9.24, + "learning_rate": 1.921644977285924e-05, + "loss": 2.6982, + "step": 360500 + }, + { + "epoch": 9.24, + "learning_rate": 1.920791064658264e-05, + "loss": 2.6013, + "step": 360600 + }, + { + "epoch": 9.24, + "learning_rate": 1.9199371520306043e-05, + "loss": 2.4856, + "step": 360700 + }, + { + "epoch": 9.24, + "learning_rate": 1.9190832394029444e-05, + "loss": 2.5226, + "step": 360800 + }, + { + "epoch": 9.25, + "learning_rate": 1.9182293267752842e-05, + "loss": 2.4428, + "step": 360900 + }, + { + "epoch": 9.25, + "learning_rate": 1.9173754141476244e-05, + "loss": 2.5025, + "step": 361000 + }, + { + "epoch": 9.25, + "learning_rate": 1.9165215015199646e-05, + "loss": 2.4364, + "step": 361100 + }, + { + "epoch": 9.25, + "learning_rate": 1.9156675888923044e-05, + "loss": 2.3382, + "step": 361200 + }, + { + "epoch": 9.26, + "learning_rate": 1.9148136762646445e-05, + "loss": 2.3355, + "step": 361300 + }, + { + "epoch": 9.26, + "learning_rate": 1.9139597636369847e-05, + "loss": 2.3755, + "step": 361400 + }, + { + "epoch": 9.26, + "learning_rate": 1.913105851009325e-05, + "loss": 2.5798, + "step": 361500 + }, + { + "epoch": 9.26, + "learning_rate": 1.9122519383816646e-05, + "loss": 2.5586, + "step": 361600 + }, + { + "epoch": 9.27, + "learning_rate": 1.9113980257540048e-05, + "loss": 2.5779, + "step": 361700 + }, + { + "epoch": 9.27, + "learning_rate": 1.910544113126345e-05, + "loss": 2.4358, + "step": 361800 + }, + { + "epoch": 9.27, + "learning_rate": 1.909690200498685e-05, + "loss": 2.4411, + "step": 361900 + }, + { + "epoch": 9.27, + "learning_rate": 1.908836287871025e-05, + "loss": 2.556, + "step": 362000 + }, + { + "epoch": 9.28, + "learning_rate": 1.907982375243365e-05, + "loss": 2.4819, + "step": 362100 + }, + { + "epoch": 9.28, + "learning_rate": 1.9071284626157052e-05, + "loss": 2.3784, + "step": 362200 + }, + { + "epoch": 9.28, + "learning_rate": 1.906274549988045e-05, + "loss": 2.2813, + "step": 362300 + }, + { + "epoch": 9.28, + "learning_rate": 1.9054206373603852e-05, + "loss": 2.3302, + "step": 362400 + }, + { + "epoch": 9.29, + "learning_rate": 1.9045667247327254e-05, + "loss": 2.5037, + "step": 362500 + }, + { + "epoch": 9.29, + "learning_rate": 1.9037128121050655e-05, + "loss": 2.5283, + "step": 362600 + }, + { + "epoch": 9.29, + "learning_rate": 1.9028588994774053e-05, + "loss": 2.3458, + "step": 362700 + }, + { + "epoch": 9.29, + "learning_rate": 1.9020049868497455e-05, + "loss": 2.4362, + "step": 362800 + }, + { + "epoch": 9.3, + "learning_rate": 1.9011510742220856e-05, + "loss": 2.5364, + "step": 362900 + }, + { + "epoch": 9.3, + "learning_rate": 1.9002971615944258e-05, + "loss": 2.3834, + "step": 363000 + }, + { + "epoch": 9.3, + "learning_rate": 1.8994432489667656e-05, + "loss": 2.4197, + "step": 363100 + }, + { + "epoch": 9.3, + "learning_rate": 1.8985893363391058e-05, + "loss": 2.4561, + "step": 363200 + }, + { + "epoch": 9.31, + "learning_rate": 1.897735423711446e-05, + "loss": 2.4097, + "step": 363300 + }, + { + "epoch": 9.31, + "learning_rate": 1.896881511083786e-05, + "loss": 2.4191, + "step": 363400 + }, + { + "epoch": 9.31, + "learning_rate": 1.896027598456126e-05, + "loss": 2.3627, + "step": 363500 + }, + { + "epoch": 9.31, + "learning_rate": 1.895173685828466e-05, + "loss": 2.5534, + "step": 363600 + }, + { + "epoch": 9.32, + "learning_rate": 1.8943197732008062e-05, + "loss": 2.3401, + "step": 363700 + }, + { + "epoch": 9.32, + "learning_rate": 1.893465860573146e-05, + "loss": 2.5124, + "step": 363800 + }, + { + "epoch": 9.32, + "learning_rate": 1.8926119479454862e-05, + "loss": 2.5377, + "step": 363900 + }, + { + "epoch": 9.32, + "learning_rate": 1.8917580353178263e-05, + "loss": 2.4968, + "step": 364000 + }, + { + "epoch": 9.33, + "learning_rate": 1.8909041226901665e-05, + "loss": 2.5767, + "step": 364100 + }, + { + "epoch": 9.33, + "learning_rate": 1.8900502100625063e-05, + "loss": 2.46, + "step": 364200 + }, + { + "epoch": 9.33, + "learning_rate": 1.8891962974348465e-05, + "loss": 2.2387, + "step": 364300 + }, + { + "epoch": 9.33, + "learning_rate": 1.8883423848071866e-05, + "loss": 2.4068, + "step": 364400 + }, + { + "epoch": 9.34, + "learning_rate": 1.8874884721795268e-05, + "loss": 2.5661, + "step": 364500 + }, + { + "epoch": 9.34, + "learning_rate": 1.8866345595518666e-05, + "loss": 2.5201, + "step": 364600 + }, + { + "epoch": 9.34, + "learning_rate": 1.8857806469242067e-05, + "loss": 2.4884, + "step": 364700 + }, + { + "epoch": 9.35, + "learning_rate": 1.884926734296547e-05, + "loss": 2.6423, + "step": 364800 + }, + { + "epoch": 9.35, + "learning_rate": 1.884072821668887e-05, + "loss": 2.5374, + "step": 364900 + }, + { + "epoch": 9.35, + "learning_rate": 1.883218909041227e-05, + "loss": 2.3762, + "step": 365000 + }, + { + "epoch": 9.35, + "learning_rate": 1.882364996413567e-05, + "loss": 2.4098, + "step": 365100 + }, + { + "epoch": 9.36, + "learning_rate": 1.8815110837859072e-05, + "loss": 2.5898, + "step": 365200 + }, + { + "epoch": 9.36, + "learning_rate": 1.880657171158247e-05, + "loss": 2.3937, + "step": 365300 + }, + { + "epoch": 9.36, + "learning_rate": 1.879803258530587e-05, + "loss": 2.5132, + "step": 365400 + }, + { + "epoch": 9.36, + "learning_rate": 1.8789493459029273e-05, + "loss": 2.3442, + "step": 365500 + }, + { + "epoch": 9.37, + "learning_rate": 1.8780954332752675e-05, + "loss": 2.4829, + "step": 365600 + }, + { + "epoch": 9.37, + "learning_rate": 1.8772415206476073e-05, + "loss": 2.5117, + "step": 365700 + }, + { + "epoch": 9.37, + "learning_rate": 1.8763876080199474e-05, + "loss": 2.4341, + "step": 365800 + }, + { + "epoch": 9.37, + "learning_rate": 1.8755336953922876e-05, + "loss": 2.66, + "step": 365900 + }, + { + "epoch": 9.38, + "learning_rate": 1.8746797827646277e-05, + "loss": 2.3417, + "step": 366000 + }, + { + "epoch": 9.38, + "learning_rate": 1.8738258701369675e-05, + "loss": 2.4579, + "step": 366100 + }, + { + "epoch": 9.38, + "learning_rate": 1.8729719575093077e-05, + "loss": 2.5, + "step": 366200 + }, + { + "epoch": 9.38, + "learning_rate": 1.872118044881648e-05, + "loss": 2.3192, + "step": 366300 + }, + { + "epoch": 9.39, + "learning_rate": 1.8712641322539877e-05, + "loss": 2.5531, + "step": 366400 + }, + { + "epoch": 9.39, + "learning_rate": 1.8704102196263278e-05, + "loss": 2.5078, + "step": 366500 + }, + { + "epoch": 9.39, + "learning_rate": 1.869556306998668e-05, + "loss": 2.3509, + "step": 366600 + }, + { + "epoch": 9.39, + "learning_rate": 1.868702394371008e-05, + "loss": 2.4027, + "step": 366700 + }, + { + "epoch": 9.4, + "learning_rate": 1.867848481743348e-05, + "loss": 2.2721, + "step": 366800 + }, + { + "epoch": 9.4, + "learning_rate": 1.866994569115688e-05, + "loss": 2.3922, + "step": 366900 + }, + { + "epoch": 9.4, + "learning_rate": 1.8661406564880283e-05, + "loss": 2.4485, + "step": 367000 + }, + { + "epoch": 9.4, + "learning_rate": 1.8652867438603684e-05, + "loss": 2.5269, + "step": 367100 + }, + { + "epoch": 9.41, + "learning_rate": 1.8644328312327082e-05, + "loss": 2.4858, + "step": 367200 + }, + { + "epoch": 9.41, + "learning_rate": 1.8635789186050484e-05, + "loss": 2.5382, + "step": 367300 + }, + { + "epoch": 9.41, + "learning_rate": 1.8627250059773885e-05, + "loss": 2.3414, + "step": 367400 + }, + { + "epoch": 9.41, + "learning_rate": 1.8618710933497287e-05, + "loss": 2.4237, + "step": 367500 + }, + { + "epoch": 9.42, + "learning_rate": 1.8610171807220685e-05, + "loss": 2.6329, + "step": 367600 + }, + { + "epoch": 9.42, + "learning_rate": 1.8601632680944087e-05, + "loss": 2.3772, + "step": 367700 + }, + { + "epoch": 9.42, + "learning_rate": 1.8593093554667488e-05, + "loss": 2.3128, + "step": 367800 + }, + { + "epoch": 9.42, + "learning_rate": 1.8584554428390886e-05, + "loss": 2.4733, + "step": 367900 + }, + { + "epoch": 9.43, + "learning_rate": 1.8576015302114288e-05, + "loss": 2.4864, + "step": 368000 + }, + { + "epoch": 9.43, + "learning_rate": 1.856747617583769e-05, + "loss": 2.6485, + "step": 368100 + }, + { + "epoch": 9.43, + "learning_rate": 1.855893704956109e-05, + "loss": 2.4724, + "step": 368200 + }, + { + "epoch": 9.43, + "learning_rate": 1.855039792328449e-05, + "loss": 2.4789, + "step": 368300 + }, + { + "epoch": 9.44, + "learning_rate": 1.854185879700789e-05, + "loss": 2.5142, + "step": 368400 + }, + { + "epoch": 9.44, + "learning_rate": 1.8533319670731292e-05, + "loss": 2.5463, + "step": 368500 + }, + { + "epoch": 9.44, + "learning_rate": 1.8524780544454694e-05, + "loss": 2.3217, + "step": 368600 + }, + { + "epoch": 9.45, + "learning_rate": 1.8516241418178092e-05, + "loss": 2.384, + "step": 368700 + }, + { + "epoch": 9.45, + "learning_rate": 1.8507702291901494e-05, + "loss": 2.4266, + "step": 368800 + }, + { + "epoch": 9.45, + "learning_rate": 1.8499163165624895e-05, + "loss": 2.4248, + "step": 368900 + }, + { + "epoch": 9.45, + "learning_rate": 1.8490624039348293e-05, + "loss": 2.5011, + "step": 369000 + }, + { + "epoch": 9.46, + "learning_rate": 1.8482084913071695e-05, + "loss": 2.3596, + "step": 369100 + }, + { + "epoch": 9.46, + "learning_rate": 1.8473545786795096e-05, + "loss": 2.5138, + "step": 369200 + }, + { + "epoch": 9.46, + "learning_rate": 1.8465006660518498e-05, + "loss": 2.5326, + "step": 369300 + }, + { + "epoch": 9.46, + "learning_rate": 1.8456467534241896e-05, + "loss": 2.7676, + "step": 369400 + }, + { + "epoch": 9.47, + "learning_rate": 1.8447928407965298e-05, + "loss": 2.5415, + "step": 369500 + }, + { + "epoch": 9.47, + "learning_rate": 1.84393892816887e-05, + "loss": 2.4893, + "step": 369600 + }, + { + "epoch": 9.47, + "learning_rate": 1.84308501554121e-05, + "loss": 2.4449, + "step": 369700 + }, + { + "epoch": 9.47, + "learning_rate": 1.84223110291355e-05, + "loss": 2.438, + "step": 369800 + }, + { + "epoch": 9.48, + "learning_rate": 1.84137719028589e-05, + "loss": 2.5547, + "step": 369900 + }, + { + "epoch": 9.48, + "learning_rate": 1.8405232776582302e-05, + "loss": 2.5122, + "step": 370000 + }, + { + "epoch": 9.48, + "learning_rate": 1.8396693650305704e-05, + "loss": 2.403, + "step": 370100 + }, + { + "epoch": 9.48, + "learning_rate": 1.83881545240291e-05, + "loss": 2.5822, + "step": 370200 + }, + { + "epoch": 9.49, + "learning_rate": 1.8379615397752503e-05, + "loss": 2.2563, + "step": 370300 + }, + { + "epoch": 9.49, + "learning_rate": 1.8371076271475905e-05, + "loss": 2.4158, + "step": 370400 + }, + { + "epoch": 9.49, + "learning_rate": 1.8362537145199303e-05, + "loss": 2.4103, + "step": 370500 + }, + { + "epoch": 9.49, + "learning_rate": 1.8353998018922704e-05, + "loss": 2.3934, + "step": 370600 + }, + { + "epoch": 9.5, + "learning_rate": 1.8345458892646106e-05, + "loss": 2.6031, + "step": 370700 + }, + { + "epoch": 9.5, + "learning_rate": 1.8336919766369508e-05, + "loss": 2.5486, + "step": 370800 + }, + { + "epoch": 9.5, + "learning_rate": 1.8328380640092906e-05, + "loss": 2.3811, + "step": 370900 + }, + { + "epoch": 9.5, + "learning_rate": 1.8319841513816307e-05, + "loss": 2.5578, + "step": 371000 + }, + { + "epoch": 9.51, + "learning_rate": 1.831130238753971e-05, + "loss": 2.3826, + "step": 371100 + }, + { + "epoch": 9.51, + "learning_rate": 1.830276326126311e-05, + "loss": 2.5775, + "step": 371200 + }, + { + "epoch": 9.51, + "learning_rate": 1.829422413498651e-05, + "loss": 2.6319, + "step": 371300 + }, + { + "epoch": 9.51, + "learning_rate": 1.828568500870991e-05, + "loss": 2.2904, + "step": 371400 + }, + { + "epoch": 9.52, + "learning_rate": 1.827714588243331e-05, + "loss": 2.4549, + "step": 371500 + }, + { + "epoch": 9.52, + "learning_rate": 1.826860675615671e-05, + "loss": 2.612, + "step": 371600 + }, + { + "epoch": 9.52, + "learning_rate": 1.826006762988011e-05, + "loss": 2.444, + "step": 371700 + }, + { + "epoch": 9.52, + "learning_rate": 1.8251528503603513e-05, + "loss": 2.4744, + "step": 371800 + }, + { + "epoch": 9.53, + "learning_rate": 1.8242989377326914e-05, + "loss": 2.5093, + "step": 371900 + }, + { + "epoch": 9.53, + "learning_rate": 1.8234450251050313e-05, + "loss": 2.3673, + "step": 372000 + }, + { + "epoch": 9.53, + "learning_rate": 1.8225911124773714e-05, + "loss": 2.3785, + "step": 372100 + }, + { + "epoch": 9.53, + "learning_rate": 1.8217371998497116e-05, + "loss": 2.5666, + "step": 372200 + }, + { + "epoch": 9.54, + "learning_rate": 1.8208832872220517e-05, + "loss": 2.251, + "step": 372300 + }, + { + "epoch": 9.54, + "learning_rate": 1.8200293745943915e-05, + "loss": 2.5145, + "step": 372400 + }, + { + "epoch": 9.54, + "learning_rate": 1.8191754619667317e-05, + "loss": 2.4607, + "step": 372500 + }, + { + "epoch": 9.55, + "learning_rate": 1.818321549339072e-05, + "loss": 2.5412, + "step": 372600 + }, + { + "epoch": 9.55, + "learning_rate": 1.817467636711412e-05, + "loss": 2.4565, + "step": 372700 + }, + { + "epoch": 9.55, + "learning_rate": 1.8166137240837518e-05, + "loss": 2.4761, + "step": 372800 + }, + { + "epoch": 9.55, + "learning_rate": 1.815759811456092e-05, + "loss": 2.4169, + "step": 372900 + }, + { + "epoch": 9.56, + "learning_rate": 1.814905898828432e-05, + "loss": 2.298, + "step": 373000 + }, + { + "epoch": 9.56, + "learning_rate": 1.814051986200772e-05, + "loss": 2.4365, + "step": 373100 + }, + { + "epoch": 9.56, + "learning_rate": 1.813198073573112e-05, + "loss": 2.5362, + "step": 373200 + }, + { + "epoch": 9.56, + "learning_rate": 1.8123441609454523e-05, + "loss": 2.4336, + "step": 373300 + }, + { + "epoch": 9.57, + "learning_rate": 1.8114902483177924e-05, + "loss": 2.2315, + "step": 373400 + }, + { + "epoch": 9.57, + "learning_rate": 1.8106363356901322e-05, + "loss": 2.304, + "step": 373500 + }, + { + "epoch": 9.57, + "learning_rate": 1.8097824230624724e-05, + "loss": 2.444, + "step": 373600 + }, + { + "epoch": 9.57, + "learning_rate": 1.8089285104348125e-05, + "loss": 2.4737, + "step": 373700 + }, + { + "epoch": 9.58, + "learning_rate": 1.8080745978071527e-05, + "loss": 2.4807, + "step": 373800 + }, + { + "epoch": 9.58, + "learning_rate": 1.8072206851794925e-05, + "loss": 2.4411, + "step": 373900 + }, + { + "epoch": 9.58, + "learning_rate": 1.8063667725518327e-05, + "loss": 2.4845, + "step": 374000 + }, + { + "epoch": 9.58, + "learning_rate": 1.8055128599241728e-05, + "loss": 2.5516, + "step": 374100 + }, + { + "epoch": 9.59, + "learning_rate": 1.8046589472965126e-05, + "loss": 2.4418, + "step": 374200 + }, + { + "epoch": 9.59, + "learning_rate": 1.8038050346688528e-05, + "loss": 2.4746, + "step": 374300 + }, + { + "epoch": 9.59, + "learning_rate": 1.802951122041193e-05, + "loss": 2.2424, + "step": 374400 + }, + { + "epoch": 9.59, + "learning_rate": 1.802097209413533e-05, + "loss": 2.537, + "step": 374500 + }, + { + "epoch": 9.6, + "learning_rate": 1.801243296785873e-05, + "loss": 2.4697, + "step": 374600 + }, + { + "epoch": 9.6, + "learning_rate": 1.800389384158213e-05, + "loss": 2.4984, + "step": 374700 + }, + { + "epoch": 9.6, + "learning_rate": 1.7995354715305532e-05, + "loss": 2.6161, + "step": 374800 + }, + { + "epoch": 9.6, + "learning_rate": 1.7986815589028934e-05, + "loss": 2.4842, + "step": 374900 + }, + { + "epoch": 9.61, + "learning_rate": 1.7978276462752332e-05, + "loss": 2.2586, + "step": 375000 + }, + { + "epoch": 9.61, + "learning_rate": 1.7969737336475734e-05, + "loss": 2.5688, + "step": 375100 + }, + { + "epoch": 9.61, + "learning_rate": 1.7961198210199135e-05, + "loss": 2.3439, + "step": 375200 + }, + { + "epoch": 9.61, + "learning_rate": 1.7952659083922537e-05, + "loss": 2.4299, + "step": 375300 + }, + { + "epoch": 9.62, + "learning_rate": 1.7944119957645935e-05, + "loss": 2.6774, + "step": 375400 + }, + { + "epoch": 9.62, + "learning_rate": 1.7935580831369336e-05, + "loss": 2.4918, + "step": 375500 + }, + { + "epoch": 9.62, + "learning_rate": 1.7927041705092738e-05, + "loss": 2.3825, + "step": 375600 + }, + { + "epoch": 9.62, + "learning_rate": 1.7918502578816136e-05, + "loss": 2.4744, + "step": 375700 + }, + { + "epoch": 9.63, + "learning_rate": 1.7909963452539538e-05, + "loss": 2.4258, + "step": 375800 + }, + { + "epoch": 9.63, + "learning_rate": 1.790142432626294e-05, + "loss": 2.5512, + "step": 375900 + }, + { + "epoch": 9.63, + "learning_rate": 1.789288519998634e-05, + "loss": 2.3063, + "step": 376000 + }, + { + "epoch": 9.63, + "learning_rate": 1.788434607370974e-05, + "loss": 2.4228, + "step": 376100 + }, + { + "epoch": 9.64, + "learning_rate": 1.787580694743314e-05, + "loss": 2.409, + "step": 376200 + }, + { + "epoch": 9.64, + "learning_rate": 1.7867267821156542e-05, + "loss": 2.4866, + "step": 376300 + }, + { + "epoch": 9.64, + "learning_rate": 1.7858728694879943e-05, + "loss": 2.4487, + "step": 376400 + }, + { + "epoch": 9.64, + "learning_rate": 1.785018956860334e-05, + "loss": 2.3571, + "step": 376500 + }, + { + "epoch": 9.65, + "learning_rate": 1.7841650442326743e-05, + "loss": 2.6049, + "step": 376600 + }, + { + "epoch": 9.65, + "learning_rate": 1.7833111316050145e-05, + "loss": 2.4646, + "step": 376700 + }, + { + "epoch": 9.65, + "learning_rate": 1.7824572189773546e-05, + "loss": 2.3863, + "step": 376800 + }, + { + "epoch": 9.66, + "learning_rate": 1.7816033063496944e-05, + "loss": 2.4522, + "step": 376900 + }, + { + "epoch": 9.66, + "learning_rate": 1.7807493937220343e-05, + "loss": 2.4233, + "step": 377000 + }, + { + "epoch": 9.66, + "learning_rate": 1.7798954810943744e-05, + "loss": 2.5585, + "step": 377100 + }, + { + "epoch": 9.66, + "learning_rate": 1.7790415684667146e-05, + "loss": 2.6039, + "step": 377200 + }, + { + "epoch": 9.67, + "learning_rate": 1.7781876558390544e-05, + "loss": 2.502, + "step": 377300 + }, + { + "epoch": 9.67, + "learning_rate": 1.7773337432113945e-05, + "loss": 2.5336, + "step": 377400 + }, + { + "epoch": 9.67, + "learning_rate": 1.7764798305837347e-05, + "loss": 2.4525, + "step": 377500 + }, + { + "epoch": 9.67, + "learning_rate": 1.775625917956075e-05, + "loss": 2.5984, + "step": 377600 + }, + { + "epoch": 9.68, + "learning_rate": 1.7747720053284147e-05, + "loss": 2.4445, + "step": 377700 + }, + { + "epoch": 9.68, + "learning_rate": 1.7739180927007548e-05, + "loss": 2.2989, + "step": 377800 + }, + { + "epoch": 9.68, + "learning_rate": 1.773064180073095e-05, + "loss": 2.4097, + "step": 377900 + }, + { + "epoch": 9.68, + "learning_rate": 1.7722102674454348e-05, + "loss": 2.4614, + "step": 378000 + }, + { + "epoch": 9.69, + "learning_rate": 1.771356354817775e-05, + "loss": 2.4558, + "step": 378100 + }, + { + "epoch": 9.69, + "learning_rate": 1.770502442190115e-05, + "loss": 2.4049, + "step": 378200 + }, + { + "epoch": 9.69, + "learning_rate": 1.7696485295624553e-05, + "loss": 2.5365, + "step": 378300 + }, + { + "epoch": 9.69, + "learning_rate": 1.768794616934795e-05, + "loss": 2.5574, + "step": 378400 + }, + { + "epoch": 9.7, + "learning_rate": 1.7679407043071352e-05, + "loss": 2.5316, + "step": 378500 + }, + { + "epoch": 9.7, + "learning_rate": 1.7670867916794754e-05, + "loss": 2.5792, + "step": 378600 + }, + { + "epoch": 9.7, + "learning_rate": 1.7662328790518155e-05, + "loss": 2.372, + "step": 378700 + }, + { + "epoch": 9.7, + "learning_rate": 1.7653789664241554e-05, + "loss": 2.5235, + "step": 378800 + }, + { + "epoch": 9.71, + "learning_rate": 1.7645250537964955e-05, + "loss": 2.5767, + "step": 378900 + }, + { + "epoch": 9.71, + "learning_rate": 1.7636711411688357e-05, + "loss": 2.4736, + "step": 379000 + }, + { + "epoch": 9.71, + "learning_rate": 1.7628172285411755e-05, + "loss": 2.4114, + "step": 379100 + }, + { + "epoch": 9.71, + "learning_rate": 1.7619633159135156e-05, + "loss": 2.3932, + "step": 379200 + }, + { + "epoch": 9.72, + "learning_rate": 1.7611094032858558e-05, + "loss": 2.5267, + "step": 379300 + }, + { + "epoch": 9.72, + "learning_rate": 1.760255490658196e-05, + "loss": 2.5374, + "step": 379400 + }, + { + "epoch": 9.72, + "learning_rate": 1.7594015780305358e-05, + "loss": 2.4665, + "step": 379500 + }, + { + "epoch": 9.72, + "learning_rate": 1.758547665402876e-05, + "loss": 2.491, + "step": 379600 + }, + { + "epoch": 9.73, + "learning_rate": 1.757693752775216e-05, + "loss": 2.5024, + "step": 379700 + }, + { + "epoch": 9.73, + "learning_rate": 1.7568398401475562e-05, + "loss": 2.5219, + "step": 379800 + }, + { + "epoch": 9.73, + "learning_rate": 1.755985927519896e-05, + "loss": 2.4444, + "step": 379900 + }, + { + "epoch": 9.73, + "learning_rate": 1.7551320148922362e-05, + "loss": 2.4486, + "step": 380000 + }, + { + "epoch": 9.74, + "learning_rate": 1.7542781022645763e-05, + "loss": 2.4555, + "step": 380100 + }, + { + "epoch": 9.74, + "learning_rate": 1.7534241896369165e-05, + "loss": 2.4537, + "step": 380200 + }, + { + "epoch": 9.74, + "learning_rate": 1.7525702770092563e-05, + "loss": 2.4673, + "step": 380300 + }, + { + "epoch": 9.74, + "learning_rate": 1.7517163643815965e-05, + "loss": 2.5531, + "step": 380400 + }, + { + "epoch": 9.75, + "learning_rate": 1.7508624517539366e-05, + "loss": 2.5371, + "step": 380500 + }, + { + "epoch": 9.75, + "learning_rate": 1.7500085391262764e-05, + "loss": 2.692, + "step": 380600 + }, + { + "epoch": 9.75, + "learning_rate": 1.7491546264986166e-05, + "loss": 2.2664, + "step": 380700 + }, + { + "epoch": 9.76, + "learning_rate": 1.7483007138709568e-05, + "loss": 2.4715, + "step": 380800 + }, + { + "epoch": 9.76, + "learning_rate": 1.747446801243297e-05, + "loss": 2.4072, + "step": 380900 + }, + { + "epoch": 9.76, + "learning_rate": 1.7465928886156367e-05, + "loss": 2.3953, + "step": 381000 + }, + { + "epoch": 9.76, + "learning_rate": 1.745738975987977e-05, + "loss": 2.3845, + "step": 381100 + }, + { + "epoch": 9.77, + "learning_rate": 1.744885063360317e-05, + "loss": 2.4376, + "step": 381200 + }, + { + "epoch": 9.77, + "learning_rate": 1.7440311507326572e-05, + "loss": 2.5648, + "step": 381300 + }, + { + "epoch": 9.77, + "learning_rate": 1.743177238104997e-05, + "loss": 2.4225, + "step": 381400 + }, + { + "epoch": 9.77, + "learning_rate": 1.742323325477337e-05, + "loss": 2.2958, + "step": 381500 + }, + { + "epoch": 9.78, + "learning_rate": 1.7414694128496773e-05, + "loss": 2.5642, + "step": 381600 + }, + { + "epoch": 9.78, + "learning_rate": 1.7406155002220175e-05, + "loss": 2.4446, + "step": 381700 + }, + { + "epoch": 9.78, + "learning_rate": 1.7397615875943573e-05, + "loss": 2.4259, + "step": 381800 + }, + { + "epoch": 9.78, + "learning_rate": 1.7389076749666974e-05, + "loss": 2.4643, + "step": 381900 + }, + { + "epoch": 9.79, + "learning_rate": 1.7380537623390376e-05, + "loss": 2.5778, + "step": 382000 + }, + { + "epoch": 9.79, + "learning_rate": 1.7371998497113774e-05, + "loss": 2.3056, + "step": 382100 + }, + { + "epoch": 9.79, + "learning_rate": 1.7363459370837176e-05, + "loss": 2.4528, + "step": 382200 + }, + { + "epoch": 9.79, + "learning_rate": 1.7354920244560577e-05, + "loss": 2.5293, + "step": 382300 + }, + { + "epoch": 9.8, + "learning_rate": 1.734638111828398e-05, + "loss": 2.552, + "step": 382400 + }, + { + "epoch": 9.8, + "learning_rate": 1.7337841992007377e-05, + "loss": 2.4343, + "step": 382500 + }, + { + "epoch": 9.8, + "learning_rate": 1.732930286573078e-05, + "loss": 2.4091, + "step": 382600 + }, + { + "epoch": 9.8, + "learning_rate": 1.732076373945418e-05, + "loss": 2.5018, + "step": 382700 + }, + { + "epoch": 9.81, + "learning_rate": 1.731222461317758e-05, + "loss": 2.4653, + "step": 382800 + }, + { + "epoch": 9.81, + "learning_rate": 1.730368548690098e-05, + "loss": 2.4213, + "step": 382900 + }, + { + "epoch": 9.81, + "learning_rate": 1.729514636062438e-05, + "loss": 2.5149, + "step": 383000 + }, + { + "epoch": 9.81, + "learning_rate": 1.7286607234347783e-05, + "loss": 2.3389, + "step": 383100 + }, + { + "epoch": 9.82, + "learning_rate": 1.727806810807118e-05, + "loss": 2.2932, + "step": 383200 + }, + { + "epoch": 9.82, + "learning_rate": 1.7269528981794583e-05, + "loss": 2.425, + "step": 383300 + }, + { + "epoch": 9.82, + "learning_rate": 1.7260989855517984e-05, + "loss": 2.5606, + "step": 383400 + }, + { + "epoch": 9.82, + "learning_rate": 1.7252450729241386e-05, + "loss": 2.2893, + "step": 383500 + }, + { + "epoch": 9.83, + "learning_rate": 1.7243911602964784e-05, + "loss": 2.6422, + "step": 383600 + }, + { + "epoch": 9.83, + "learning_rate": 1.7235372476688185e-05, + "loss": 2.4667, + "step": 383700 + }, + { + "epoch": 9.83, + "learning_rate": 1.7226833350411587e-05, + "loss": 2.4505, + "step": 383800 + }, + { + "epoch": 9.83, + "learning_rate": 1.721829422413499e-05, + "loss": 2.4373, + "step": 383900 + }, + { + "epoch": 9.84, + "learning_rate": 1.7209755097858387e-05, + "loss": 2.5523, + "step": 384000 + }, + { + "epoch": 9.84, + "learning_rate": 1.7201215971581788e-05, + "loss": 2.5825, + "step": 384100 + }, + { + "epoch": 9.84, + "learning_rate": 1.719267684530519e-05, + "loss": 2.2537, + "step": 384200 + }, + { + "epoch": 9.84, + "learning_rate": 1.718413771902859e-05, + "loss": 2.592, + "step": 384300 + }, + { + "epoch": 9.85, + "learning_rate": 1.717559859275199e-05, + "loss": 2.391, + "step": 384400 + }, + { + "epoch": 9.85, + "learning_rate": 1.716705946647539e-05, + "loss": 2.5364, + "step": 384500 + }, + { + "epoch": 9.85, + "learning_rate": 1.7158520340198792e-05, + "loss": 2.3426, + "step": 384600 + }, + { + "epoch": 9.86, + "learning_rate": 1.714998121392219e-05, + "loss": 2.6583, + "step": 384700 + }, + { + "epoch": 9.86, + "learning_rate": 1.7141442087645592e-05, + "loss": 2.4238, + "step": 384800 + }, + { + "epoch": 9.86, + "learning_rate": 1.7132902961368994e-05, + "loss": 2.2801, + "step": 384900 + }, + { + "epoch": 9.86, + "learning_rate": 1.7124363835092395e-05, + "loss": 2.495, + "step": 385000 + }, + { + "epoch": 9.87, + "learning_rate": 1.7115824708815793e-05, + "loss": 2.4237, + "step": 385100 + }, + { + "epoch": 9.87, + "learning_rate": 1.7107285582539195e-05, + "loss": 2.4236, + "step": 385200 + }, + { + "epoch": 9.87, + "learning_rate": 1.7098746456262597e-05, + "loss": 2.2743, + "step": 385300 + }, + { + "epoch": 9.87, + "learning_rate": 1.7090207329985998e-05, + "loss": 2.4369, + "step": 385400 + }, + { + "epoch": 9.88, + "learning_rate": 1.7081668203709396e-05, + "loss": 2.568, + "step": 385500 + }, + { + "epoch": 9.88, + "learning_rate": 1.7073129077432798e-05, + "loss": 2.3912, + "step": 385600 + }, + { + "epoch": 9.88, + "learning_rate": 1.70645899511562e-05, + "loss": 2.4135, + "step": 385700 + }, + { + "epoch": 9.88, + "learning_rate": 1.7056050824879598e-05, + "loss": 2.3469, + "step": 385800 + }, + { + "epoch": 9.89, + "learning_rate": 1.7047511698603e-05, + "loss": 2.5563, + "step": 385900 + }, + { + "epoch": 9.89, + "learning_rate": 1.70389725723264e-05, + "loss": 2.456, + "step": 386000 + }, + { + "epoch": 9.89, + "learning_rate": 1.7030433446049802e-05, + "loss": 2.4716, + "step": 386100 + }, + { + "epoch": 9.89, + "learning_rate": 1.70218943197732e-05, + "loss": 2.4082, + "step": 386200 + }, + { + "epoch": 9.9, + "learning_rate": 1.7013355193496602e-05, + "loss": 2.5351, + "step": 386300 + }, + { + "epoch": 9.9, + "learning_rate": 1.7004816067220003e-05, + "loss": 2.4178, + "step": 386400 + }, + { + "epoch": 9.9, + "learning_rate": 1.6996276940943405e-05, + "loss": 2.474, + "step": 386500 + }, + { + "epoch": 9.9, + "learning_rate": 1.6987737814666803e-05, + "loss": 2.4404, + "step": 386600 + }, + { + "epoch": 9.91, + "learning_rate": 1.6979198688390205e-05, + "loss": 2.4461, + "step": 386700 + }, + { + "epoch": 9.91, + "learning_rate": 1.6970659562113606e-05, + "loss": 2.5338, + "step": 386800 + }, + { + "epoch": 9.91, + "learning_rate": 1.6962120435837008e-05, + "loss": 2.4256, + "step": 386900 + }, + { + "epoch": 9.91, + "learning_rate": 1.6953581309560406e-05, + "loss": 2.5794, + "step": 387000 + }, + { + "epoch": 9.92, + "learning_rate": 1.6945042183283807e-05, + "loss": 2.3708, + "step": 387100 + }, + { + "epoch": 9.92, + "learning_rate": 1.693650305700721e-05, + "loss": 2.5113, + "step": 387200 + }, + { + "epoch": 9.92, + "learning_rate": 1.6927963930730607e-05, + "loss": 2.3322, + "step": 387300 + }, + { + "epoch": 9.92, + "learning_rate": 1.691942480445401e-05, + "loss": 2.4048, + "step": 387400 + }, + { + "epoch": 9.93, + "learning_rate": 1.691088567817741e-05, + "loss": 2.4437, + "step": 387500 + }, + { + "epoch": 9.93, + "learning_rate": 1.6902346551900812e-05, + "loss": 2.4859, + "step": 387600 + }, + { + "epoch": 9.93, + "learning_rate": 1.689380742562421e-05, + "loss": 2.516, + "step": 387700 + }, + { + "epoch": 9.93, + "learning_rate": 1.688526829934761e-05, + "loss": 2.5643, + "step": 387800 + }, + { + "epoch": 9.94, + "learning_rate": 1.6876729173071013e-05, + "loss": 2.4569, + "step": 387900 + }, + { + "epoch": 9.94, + "learning_rate": 1.6868190046794415e-05, + "loss": 2.5162, + "step": 388000 + }, + { + "epoch": 9.94, + "learning_rate": 1.6859650920517813e-05, + "loss": 2.3696, + "step": 388100 + }, + { + "epoch": 9.94, + "learning_rate": 1.6851111794241214e-05, + "loss": 2.4488, + "step": 388200 + }, + { + "epoch": 9.95, + "learning_rate": 1.6842572667964616e-05, + "loss": 2.5093, + "step": 388300 + }, + { + "epoch": 9.95, + "learning_rate": 1.6834033541688014e-05, + "loss": 2.4386, + "step": 388400 + }, + { + "epoch": 9.95, + "learning_rate": 1.6825494415411416e-05, + "loss": 2.3979, + "step": 388500 + }, + { + "epoch": 9.95, + "learning_rate": 1.6816955289134817e-05, + "loss": 2.6886, + "step": 388600 + }, + { + "epoch": 9.96, + "learning_rate": 1.680841616285822e-05, + "loss": 2.5377, + "step": 388700 + }, + { + "epoch": 9.96, + "learning_rate": 1.6799877036581617e-05, + "loss": 2.5474, + "step": 388800 + }, + { + "epoch": 9.96, + "learning_rate": 1.679133791030502e-05, + "loss": 2.562, + "step": 388900 + }, + { + "epoch": 9.97, + "learning_rate": 1.678279878402842e-05, + "loss": 2.4386, + "step": 389000 + }, + { + "epoch": 9.97, + "learning_rate": 1.677425965775182e-05, + "loss": 2.6628, + "step": 389100 + }, + { + "epoch": 9.97, + "learning_rate": 1.676572053147522e-05, + "loss": 2.381, + "step": 389200 + }, + { + "epoch": 9.97, + "learning_rate": 1.675718140519862e-05, + "loss": 2.5905, + "step": 389300 + }, + { + "epoch": 9.98, + "learning_rate": 1.6748642278922023e-05, + "loss": 2.2932, + "step": 389400 + }, + { + "epoch": 9.98, + "learning_rate": 1.6740103152645424e-05, + "loss": 2.3794, + "step": 389500 + }, + { + "epoch": 9.98, + "learning_rate": 1.6731564026368822e-05, + "loss": 2.4643, + "step": 389600 + }, + { + "epoch": 9.98, + "learning_rate": 1.6723024900092224e-05, + "loss": 2.3464, + "step": 389700 + }, + { + "epoch": 9.99, + "learning_rate": 1.6714485773815626e-05, + "loss": 2.4667, + "step": 389800 + }, + { + "epoch": 9.99, + "learning_rate": 1.6705946647539024e-05, + "loss": 2.4188, + "step": 389900 + }, + { + "epoch": 9.99, + "learning_rate": 1.6697407521262425e-05, + "loss": 2.273, + "step": 390000 + }, + { + "epoch": 9.99, + "learning_rate": 1.6688868394985827e-05, + "loss": 2.517, + "step": 390100 + }, + { + "epoch": 10.0, + "learning_rate": 1.668032926870923e-05, + "loss": 2.405, + "step": 390200 + }, + { + "epoch": 10.0, + "learning_rate": 1.6671790142432627e-05, + "loss": 2.4721, + "step": 390300 + }, + { + "epoch": 10.0, + "learning_rate": 1.6663251016156028e-05, + "loss": 2.3936, + "step": 390400 + }, + { + "epoch": 10.0, + "learning_rate": 1.665471188987943e-05, + "loss": 2.3981, + "step": 390500 + }, + { + "epoch": 10.01, + "learning_rate": 1.664617276360283e-05, + "loss": 2.5434, + "step": 390600 + }, + { + "epoch": 10.01, + "learning_rate": 1.663763363732623e-05, + "loss": 2.4544, + "step": 390700 + }, + { + "epoch": 10.01, + "learning_rate": 1.662909451104963e-05, + "loss": 2.3425, + "step": 390800 + }, + { + "epoch": 10.01, + "learning_rate": 1.6620555384773032e-05, + "loss": 2.4189, + "step": 390900 + }, + { + "epoch": 10.02, + "learning_rate": 1.661201625849643e-05, + "loss": 2.4276, + "step": 391000 + }, + { + "epoch": 10.02, + "learning_rate": 1.6603477132219832e-05, + "loss": 2.3498, + "step": 391100 + }, + { + "epoch": 10.02, + "learning_rate": 1.6594938005943234e-05, + "loss": 2.3557, + "step": 391200 + }, + { + "epoch": 10.02, + "learning_rate": 1.6586398879666635e-05, + "loss": 2.4556, + "step": 391300 + }, + { + "epoch": 10.03, + "learning_rate": 1.6577859753390033e-05, + "loss": 2.477, + "step": 391400 + }, + { + "epoch": 10.03, + "learning_rate": 1.6569320627113435e-05, + "loss": 2.3401, + "step": 391500 + }, + { + "epoch": 10.03, + "learning_rate": 1.6560781500836836e-05, + "loss": 2.5818, + "step": 391600 + }, + { + "epoch": 10.03, + "learning_rate": 1.6552242374560238e-05, + "loss": 2.3227, + "step": 391700 + }, + { + "epoch": 10.04, + "learning_rate": 1.6543703248283636e-05, + "loss": 2.5602, + "step": 391800 + }, + { + "epoch": 10.04, + "learning_rate": 1.6535164122007038e-05, + "loss": 2.6084, + "step": 391900 + }, + { + "epoch": 10.04, + "learning_rate": 1.652662499573044e-05, + "loss": 2.7121, + "step": 392000 + }, + { + "epoch": 10.04, + "learning_rate": 1.651808586945384e-05, + "loss": 2.3617, + "step": 392100 + }, + { + "epoch": 10.05, + "learning_rate": 1.650954674317724e-05, + "loss": 2.3958, + "step": 392200 + }, + { + "epoch": 10.05, + "learning_rate": 1.650100761690064e-05, + "loss": 2.3304, + "step": 392300 + }, + { + "epoch": 10.05, + "learning_rate": 1.6492468490624042e-05, + "loss": 2.2532, + "step": 392400 + }, + { + "epoch": 10.05, + "learning_rate": 1.648392936434744e-05, + "loss": 2.3591, + "step": 392500 + }, + { + "epoch": 10.06, + "learning_rate": 1.6475390238070842e-05, + "loss": 2.3205, + "step": 392600 + }, + { + "epoch": 10.06, + "learning_rate": 1.6466851111794243e-05, + "loss": 2.504, + "step": 392700 + }, + { + "epoch": 10.06, + "learning_rate": 1.6458311985517645e-05, + "loss": 2.4209, + "step": 392800 + }, + { + "epoch": 10.07, + "learning_rate": 1.6449772859241043e-05, + "loss": 2.4739, + "step": 392900 + }, + { + "epoch": 10.07, + "learning_rate": 1.6441233732964445e-05, + "loss": 2.3814, + "step": 393000 + }, + { + "epoch": 10.07, + "learning_rate": 1.6432694606687846e-05, + "loss": 2.5431, + "step": 393100 + }, + { + "epoch": 10.07, + "learning_rate": 1.6424155480411248e-05, + "loss": 2.2433, + "step": 393200 + }, + { + "epoch": 10.08, + "learning_rate": 1.6415616354134646e-05, + "loss": 2.3893, + "step": 393300 + }, + { + "epoch": 10.08, + "learning_rate": 1.6407077227858047e-05, + "loss": 2.3591, + "step": 393400 + }, + { + "epoch": 10.08, + "learning_rate": 1.639853810158145e-05, + "loss": 2.3984, + "step": 393500 + }, + { + "epoch": 10.08, + "learning_rate": 1.638999897530485e-05, + "loss": 2.3859, + "step": 393600 + }, + { + "epoch": 10.09, + "learning_rate": 1.638145984902825e-05, + "loss": 2.4652, + "step": 393700 + }, + { + "epoch": 10.09, + "learning_rate": 1.637292072275165e-05, + "loss": 2.5129, + "step": 393800 + }, + { + "epoch": 10.09, + "learning_rate": 1.6364381596475052e-05, + "loss": 2.414, + "step": 393900 + }, + { + "epoch": 10.09, + "learning_rate": 1.635584247019845e-05, + "loss": 2.4516, + "step": 394000 + }, + { + "epoch": 10.1, + "learning_rate": 1.634730334392185e-05, + "loss": 2.4073, + "step": 394100 + }, + { + "epoch": 10.1, + "learning_rate": 1.6338764217645253e-05, + "loss": 2.39, + "step": 394200 + }, + { + "epoch": 10.1, + "learning_rate": 1.6330225091368655e-05, + "loss": 2.3152, + "step": 394300 + }, + { + "epoch": 10.1, + "learning_rate": 1.6321685965092053e-05, + "loss": 2.5527, + "step": 394400 + }, + { + "epoch": 10.11, + "learning_rate": 1.631314683881545e-05, + "loss": 2.3535, + "step": 394500 + }, + { + "epoch": 10.11, + "learning_rate": 1.6304607712538852e-05, + "loss": 2.4134, + "step": 394600 + }, + { + "epoch": 10.11, + "learning_rate": 1.6296068586262254e-05, + "loss": 2.4877, + "step": 394700 + }, + { + "epoch": 10.11, + "learning_rate": 1.6287529459985652e-05, + "loss": 2.449, + "step": 394800 + }, + { + "epoch": 10.12, + "learning_rate": 1.6278990333709054e-05, + "loss": 2.4072, + "step": 394900 + }, + { + "epoch": 10.12, + "learning_rate": 1.6270451207432455e-05, + "loss": 2.4447, + "step": 395000 + }, + { + "epoch": 10.12, + "learning_rate": 1.6261912081155857e-05, + "loss": 2.3707, + "step": 395100 + }, + { + "epoch": 10.12, + "learning_rate": 1.6253372954879255e-05, + "loss": 2.3866, + "step": 395200 + }, + { + "epoch": 10.13, + "learning_rate": 1.6244833828602657e-05, + "loss": 2.4695, + "step": 395300 + }, + { + "epoch": 10.13, + "learning_rate": 1.6236294702326058e-05, + "loss": 2.3628, + "step": 395400 + }, + { + "epoch": 10.13, + "learning_rate": 1.622775557604946e-05, + "loss": 2.4668, + "step": 395500 + }, + { + "epoch": 10.13, + "learning_rate": 1.6219216449772858e-05, + "loss": 2.4534, + "step": 395600 + }, + { + "epoch": 10.14, + "learning_rate": 1.621067732349626e-05, + "loss": 2.3748, + "step": 395700 + }, + { + "epoch": 10.14, + "learning_rate": 1.620213819721966e-05, + "loss": 2.3559, + "step": 395800 + }, + { + "epoch": 10.14, + "learning_rate": 1.6193599070943062e-05, + "loss": 2.4506, + "step": 395900 + }, + { + "epoch": 10.14, + "learning_rate": 1.618505994466646e-05, + "loss": 2.1887, + "step": 396000 + }, + { + "epoch": 10.15, + "learning_rate": 1.6176520818389862e-05, + "loss": 2.3987, + "step": 396100 + }, + { + "epoch": 10.15, + "learning_rate": 1.6167981692113264e-05, + "loss": 2.2123, + "step": 396200 + }, + { + "epoch": 10.15, + "learning_rate": 1.6159442565836662e-05, + "loss": 2.4118, + "step": 396300 + }, + { + "epoch": 10.15, + "learning_rate": 1.6150903439560063e-05, + "loss": 2.4012, + "step": 396400 + }, + { + "epoch": 10.16, + "learning_rate": 1.6142364313283465e-05, + "loss": 2.4437, + "step": 396500 + }, + { + "epoch": 10.16, + "learning_rate": 1.6133825187006866e-05, + "loss": 2.4596, + "step": 396600 + }, + { + "epoch": 10.16, + "learning_rate": 1.6125286060730265e-05, + "loss": 2.272, + "step": 396700 + }, + { + "epoch": 10.16, + "learning_rate": 1.6116746934453666e-05, + "loss": 2.4186, + "step": 396800 + }, + { + "epoch": 10.17, + "learning_rate": 1.6108207808177068e-05, + "loss": 2.3864, + "step": 396900 + }, + { + "epoch": 10.17, + "learning_rate": 1.609966868190047e-05, + "loss": 2.5098, + "step": 397000 + }, + { + "epoch": 10.17, + "learning_rate": 1.6091129555623867e-05, + "loss": 2.2682, + "step": 397100 + }, + { + "epoch": 10.18, + "learning_rate": 1.608259042934727e-05, + "loss": 2.4155, + "step": 397200 + }, + { + "epoch": 10.18, + "learning_rate": 1.607405130307067e-05, + "loss": 2.5484, + "step": 397300 + }, + { + "epoch": 10.18, + "learning_rate": 1.606551217679407e-05, + "loss": 2.3424, + "step": 397400 + }, + { + "epoch": 10.18, + "learning_rate": 1.605697305051747e-05, + "loss": 2.439, + "step": 397500 + }, + { + "epoch": 10.19, + "learning_rate": 1.6048433924240872e-05, + "loss": 2.3458, + "step": 397600 + }, + { + "epoch": 10.19, + "learning_rate": 1.6039894797964273e-05, + "loss": 2.4439, + "step": 397700 + }, + { + "epoch": 10.19, + "learning_rate": 1.603135567168767e-05, + "loss": 2.3554, + "step": 397800 + }, + { + "epoch": 10.19, + "learning_rate": 1.6022816545411073e-05, + "loss": 2.4306, + "step": 397900 + }, + { + "epoch": 10.2, + "learning_rate": 1.6014277419134475e-05, + "loss": 2.4042, + "step": 398000 + }, + { + "epoch": 10.2, + "learning_rate": 1.6005738292857876e-05, + "loss": 2.5338, + "step": 398100 + }, + { + "epoch": 10.2, + "learning_rate": 1.5997199166581274e-05, + "loss": 2.452, + "step": 398200 + }, + { + "epoch": 10.2, + "learning_rate": 1.5988660040304676e-05, + "loss": 2.3639, + "step": 398300 + }, + { + "epoch": 10.21, + "learning_rate": 1.5980120914028077e-05, + "loss": 2.3826, + "step": 398400 + }, + { + "epoch": 10.21, + "learning_rate": 1.597158178775148e-05, + "loss": 2.3524, + "step": 398500 + }, + { + "epoch": 10.21, + "learning_rate": 1.5963042661474877e-05, + "loss": 2.5094, + "step": 398600 + }, + { + "epoch": 10.21, + "learning_rate": 1.595450353519828e-05, + "loss": 2.2688, + "step": 398700 + }, + { + "epoch": 10.22, + "learning_rate": 1.594596440892168e-05, + "loss": 2.3228, + "step": 398800 + }, + { + "epoch": 10.22, + "learning_rate": 1.593742528264508e-05, + "loss": 2.366, + "step": 398900 + }, + { + "epoch": 10.22, + "learning_rate": 1.592888615636848e-05, + "loss": 2.4432, + "step": 399000 + }, + { + "epoch": 10.22, + "learning_rate": 1.592034703009188e-05, + "loss": 2.5214, + "step": 399100 + }, + { + "epoch": 10.23, + "learning_rate": 1.5911807903815283e-05, + "loss": 2.4662, + "step": 399200 + }, + { + "epoch": 10.23, + "learning_rate": 1.590326877753868e-05, + "loss": 2.3677, + "step": 399300 + }, + { + "epoch": 10.23, + "learning_rate": 1.5894729651262083e-05, + "loss": 2.3994, + "step": 399400 + }, + { + "epoch": 10.23, + "learning_rate": 1.5886190524985484e-05, + "loss": 2.4339, + "step": 399500 + }, + { + "epoch": 10.24, + "learning_rate": 1.5877651398708886e-05, + "loss": 2.465, + "step": 399600 + }, + { + "epoch": 10.24, + "learning_rate": 1.5869112272432284e-05, + "loss": 2.5653, + "step": 399700 + }, + { + "epoch": 10.24, + "learning_rate": 1.5860573146155686e-05, + "loss": 2.4402, + "step": 399800 + }, + { + "epoch": 10.24, + "learning_rate": 1.5852034019879087e-05, + "loss": 2.4643, + "step": 399900 + }, + { + "epoch": 10.25, + "learning_rate": 1.5843494893602485e-05, + "loss": 2.4224, + "step": 400000 + }, + { + "epoch": 10.25, + "learning_rate": 1.5834955767325887e-05, + "loss": 2.3672, + "step": 400100 + }, + { + "epoch": 10.25, + "learning_rate": 1.582641664104929e-05, + "loss": 2.3876, + "step": 400200 + }, + { + "epoch": 10.25, + "learning_rate": 1.581787751477269e-05, + "loss": 2.2916, + "step": 400300 + }, + { + "epoch": 10.26, + "learning_rate": 1.5809338388496088e-05, + "loss": 2.4448, + "step": 400400 + }, + { + "epoch": 10.26, + "learning_rate": 1.580079926221949e-05, + "loss": 2.4192, + "step": 400500 + }, + { + "epoch": 10.26, + "learning_rate": 1.579226013594289e-05, + "loss": 2.4131, + "step": 400600 + }, + { + "epoch": 10.26, + "learning_rate": 1.5783721009666293e-05, + "loss": 2.4761, + "step": 400700 + }, + { + "epoch": 10.27, + "learning_rate": 1.577518188338969e-05, + "loss": 2.4109, + "step": 400800 + }, + { + "epoch": 10.27, + "learning_rate": 1.5766642757113092e-05, + "loss": 2.3231, + "step": 400900 + }, + { + "epoch": 10.27, + "learning_rate": 1.5758103630836494e-05, + "loss": 2.3753, + "step": 401000 + }, + { + "epoch": 10.28, + "learning_rate": 1.5749564504559895e-05, + "loss": 2.3626, + "step": 401100 + }, + { + "epoch": 10.28, + "learning_rate": 1.5741025378283294e-05, + "loss": 2.4065, + "step": 401200 + }, + { + "epoch": 10.28, + "learning_rate": 1.5732486252006695e-05, + "loss": 2.3983, + "step": 401300 + }, + { + "epoch": 10.28, + "learning_rate": 1.5723947125730097e-05, + "loss": 2.5158, + "step": 401400 + }, + { + "epoch": 10.29, + "learning_rate": 1.5715407999453495e-05, + "loss": 2.4137, + "step": 401500 + }, + { + "epoch": 10.29, + "learning_rate": 1.5706868873176896e-05, + "loss": 2.5449, + "step": 401600 + }, + { + "epoch": 10.29, + "learning_rate": 1.5698329746900298e-05, + "loss": 2.1586, + "step": 401700 + }, + { + "epoch": 10.29, + "learning_rate": 1.56897906206237e-05, + "loss": 2.2384, + "step": 401800 + }, + { + "epoch": 10.3, + "learning_rate": 1.5681251494347098e-05, + "loss": 2.4827, + "step": 401900 + }, + { + "epoch": 10.3, + "learning_rate": 1.56727123680705e-05, + "loss": 2.6007, + "step": 402000 + }, + { + "epoch": 10.3, + "learning_rate": 1.56641732417939e-05, + "loss": 2.5747, + "step": 402100 + }, + { + "epoch": 10.3, + "learning_rate": 1.5655634115517302e-05, + "loss": 2.3761, + "step": 402200 + }, + { + "epoch": 10.31, + "learning_rate": 1.56470949892407e-05, + "loss": 2.3155, + "step": 402300 + }, + { + "epoch": 10.31, + "learning_rate": 1.5638555862964102e-05, + "loss": 2.4255, + "step": 402400 + }, + { + "epoch": 10.31, + "learning_rate": 1.5630016736687504e-05, + "loss": 2.5638, + "step": 402500 + }, + { + "epoch": 10.31, + "learning_rate": 1.5621477610410902e-05, + "loss": 2.4141, + "step": 402600 + }, + { + "epoch": 10.32, + "learning_rate": 1.5612938484134303e-05, + "loss": 2.506, + "step": 402700 + }, + { + "epoch": 10.32, + "learning_rate": 1.5604399357857705e-05, + "loss": 2.3318, + "step": 402800 + }, + { + "epoch": 10.32, + "learning_rate": 1.5595860231581106e-05, + "loss": 2.5507, + "step": 402900 + }, + { + "epoch": 10.32, + "learning_rate": 1.5587321105304505e-05, + "loss": 2.4233, + "step": 403000 + }, + { + "epoch": 10.33, + "learning_rate": 1.5578781979027906e-05, + "loss": 2.5108, + "step": 403100 + }, + { + "epoch": 10.33, + "learning_rate": 1.5570242852751308e-05, + "loss": 2.3689, + "step": 403200 + }, + { + "epoch": 10.33, + "learning_rate": 1.556170372647471e-05, + "loss": 2.2936, + "step": 403300 + }, + { + "epoch": 10.33, + "learning_rate": 1.5553164600198107e-05, + "loss": 2.3457, + "step": 403400 + }, + { + "epoch": 10.34, + "learning_rate": 1.554462547392151e-05, + "loss": 2.336, + "step": 403500 + }, + { + "epoch": 10.34, + "learning_rate": 1.553608634764491e-05, + "loss": 2.5982, + "step": 403600 + }, + { + "epoch": 10.34, + "learning_rate": 1.5527547221368312e-05, + "loss": 2.3622, + "step": 403700 + }, + { + "epoch": 10.34, + "learning_rate": 1.551900809509171e-05, + "loss": 2.5142, + "step": 403800 + }, + { + "epoch": 10.35, + "learning_rate": 1.5510468968815112e-05, + "loss": 2.3936, + "step": 403900 + }, + { + "epoch": 10.35, + "learning_rate": 1.5501929842538513e-05, + "loss": 2.5448, + "step": 404000 + }, + { + "epoch": 10.35, + "learning_rate": 1.549339071626191e-05, + "loss": 2.3943, + "step": 404100 + }, + { + "epoch": 10.35, + "learning_rate": 1.5484851589985313e-05, + "loss": 2.5677, + "step": 404200 + }, + { + "epoch": 10.36, + "learning_rate": 1.5476312463708715e-05, + "loss": 2.4401, + "step": 404300 + }, + { + "epoch": 10.36, + "learning_rate": 1.5467773337432116e-05, + "loss": 2.34, + "step": 404400 + }, + { + "epoch": 10.36, + "learning_rate": 1.5459234211155514e-05, + "loss": 2.3057, + "step": 404500 + }, + { + "epoch": 10.36, + "learning_rate": 1.5450695084878916e-05, + "loss": 2.4371, + "step": 404600 + }, + { + "epoch": 10.37, + "learning_rate": 1.5442155958602317e-05, + "loss": 2.2235, + "step": 404700 + }, + { + "epoch": 10.37, + "learning_rate": 1.543361683232572e-05, + "loss": 2.3583, + "step": 404800 + }, + { + "epoch": 10.37, + "learning_rate": 1.5425077706049117e-05, + "loss": 2.3671, + "step": 404900 + }, + { + "epoch": 10.38, + "learning_rate": 1.541653857977252e-05, + "loss": 2.4501, + "step": 405000 + }, + { + "epoch": 10.38, + "learning_rate": 1.540799945349592e-05, + "loss": 2.4837, + "step": 405100 + }, + { + "epoch": 10.38, + "learning_rate": 1.539946032721932e-05, + "loss": 2.4959, + "step": 405200 + }, + { + "epoch": 10.38, + "learning_rate": 1.539092120094272e-05, + "loss": 2.5588, + "step": 405300 + }, + { + "epoch": 10.39, + "learning_rate": 1.538238207466612e-05, + "loss": 2.388, + "step": 405400 + }, + { + "epoch": 10.39, + "learning_rate": 1.5373842948389523e-05, + "loss": 2.3401, + "step": 405500 + }, + { + "epoch": 10.39, + "learning_rate": 1.536530382211292e-05, + "loss": 2.513, + "step": 405600 + }, + { + "epoch": 10.39, + "learning_rate": 1.5356764695836323e-05, + "loss": 2.3475, + "step": 405700 + }, + { + "epoch": 10.4, + "learning_rate": 1.5348225569559724e-05, + "loss": 2.3806, + "step": 405800 + }, + { + "epoch": 10.4, + "learning_rate": 1.5339686443283126e-05, + "loss": 2.4245, + "step": 405900 + }, + { + "epoch": 10.4, + "learning_rate": 1.5331147317006524e-05, + "loss": 2.3078, + "step": 406000 + }, + { + "epoch": 10.4, + "learning_rate": 1.5322608190729925e-05, + "loss": 2.578, + "step": 406100 + }, + { + "epoch": 10.41, + "learning_rate": 1.5314069064453327e-05, + "loss": 2.3105, + "step": 406200 + }, + { + "epoch": 10.41, + "learning_rate": 1.530552993817673e-05, + "loss": 2.4754, + "step": 406300 + }, + { + "epoch": 10.41, + "learning_rate": 1.5296990811900127e-05, + "loss": 2.4158, + "step": 406400 + }, + { + "epoch": 10.41, + "learning_rate": 1.5288451685623528e-05, + "loss": 2.5898, + "step": 406500 + }, + { + "epoch": 10.42, + "learning_rate": 1.527991255934693e-05, + "loss": 2.578, + "step": 406600 + }, + { + "epoch": 10.42, + "learning_rate": 1.5271373433070328e-05, + "loss": 2.5228, + "step": 406700 + }, + { + "epoch": 10.42, + "learning_rate": 1.526283430679373e-05, + "loss": 2.3272, + "step": 406800 + }, + { + "epoch": 10.42, + "learning_rate": 1.5254295180517131e-05, + "loss": 2.4769, + "step": 406900 + }, + { + "epoch": 10.43, + "learning_rate": 1.5245756054240531e-05, + "loss": 2.5348, + "step": 407000 + }, + { + "epoch": 10.43, + "learning_rate": 1.5237216927963932e-05, + "loss": 2.4132, + "step": 407100 + }, + { + "epoch": 10.43, + "learning_rate": 1.5228677801687332e-05, + "loss": 2.3936, + "step": 407200 + }, + { + "epoch": 10.43, + "learning_rate": 1.5220138675410734e-05, + "loss": 2.4092, + "step": 407300 + }, + { + "epoch": 10.44, + "learning_rate": 1.5211599549134134e-05, + "loss": 2.3868, + "step": 407400 + }, + { + "epoch": 10.44, + "learning_rate": 1.5203060422857535e-05, + "loss": 2.3178, + "step": 407500 + }, + { + "epoch": 10.44, + "learning_rate": 1.5194521296580935e-05, + "loss": 2.5125, + "step": 407600 + }, + { + "epoch": 10.44, + "learning_rate": 1.5185982170304337e-05, + "loss": 2.7254, + "step": 407700 + }, + { + "epoch": 10.45, + "learning_rate": 1.5177443044027737e-05, + "loss": 2.4515, + "step": 407800 + }, + { + "epoch": 10.45, + "learning_rate": 1.5168903917751136e-05, + "loss": 2.4938, + "step": 407900 + }, + { + "epoch": 10.45, + "learning_rate": 1.5160364791474538e-05, + "loss": 2.396, + "step": 408000 + }, + { + "epoch": 10.45, + "learning_rate": 1.5151825665197938e-05, + "loss": 2.3851, + "step": 408100 + }, + { + "epoch": 10.46, + "learning_rate": 1.514328653892134e-05, + "loss": 2.5115, + "step": 408200 + }, + { + "epoch": 10.46, + "learning_rate": 1.513474741264474e-05, + "loss": 2.4618, + "step": 408300 + }, + { + "epoch": 10.46, + "learning_rate": 1.512620828636814e-05, + "loss": 2.343, + "step": 408400 + }, + { + "epoch": 10.46, + "learning_rate": 1.511766916009154e-05, + "loss": 2.4015, + "step": 408500 + }, + { + "epoch": 10.47, + "learning_rate": 1.5109130033814942e-05, + "loss": 2.3877, + "step": 408600 + }, + { + "epoch": 10.47, + "learning_rate": 1.5100590907538342e-05, + "loss": 2.5644, + "step": 408700 + }, + { + "epoch": 10.47, + "learning_rate": 1.5092051781261744e-05, + "loss": 2.4597, + "step": 408800 + }, + { + "epoch": 10.47, + "learning_rate": 1.5083512654985143e-05, + "loss": 2.5105, + "step": 408900 + }, + { + "epoch": 10.48, + "learning_rate": 1.5074973528708545e-05, + "loss": 2.4664, + "step": 409000 + }, + { + "epoch": 10.48, + "learning_rate": 1.5066434402431945e-05, + "loss": 2.5024, + "step": 409100 + }, + { + "epoch": 10.48, + "learning_rate": 1.5057895276155345e-05, + "loss": 2.3704, + "step": 409200 + }, + { + "epoch": 10.49, + "learning_rate": 1.5049356149878746e-05, + "loss": 2.5143, + "step": 409300 + }, + { + "epoch": 10.49, + "learning_rate": 1.5040817023602146e-05, + "loss": 2.3204, + "step": 409400 + }, + { + "epoch": 10.49, + "learning_rate": 1.5032277897325548e-05, + "loss": 2.4267, + "step": 409500 + }, + { + "epoch": 10.49, + "learning_rate": 1.5023738771048947e-05, + "loss": 2.4022, + "step": 409600 + }, + { + "epoch": 10.5, + "learning_rate": 1.5015199644772349e-05, + "loss": 2.4902, + "step": 409700 + }, + { + "epoch": 10.5, + "learning_rate": 1.5006660518495749e-05, + "loss": 2.4062, + "step": 409800 + }, + { + "epoch": 10.5, + "learning_rate": 1.499812139221915e-05, + "loss": 2.3848, + "step": 409900 + }, + { + "epoch": 10.5, + "learning_rate": 1.498958226594255e-05, + "loss": 2.3646, + "step": 410000 + }, + { + "epoch": 10.51, + "learning_rate": 1.4981043139665952e-05, + "loss": 2.4942, + "step": 410100 + }, + { + "epoch": 10.51, + "learning_rate": 1.4972504013389352e-05, + "loss": 2.5237, + "step": 410200 + }, + { + "epoch": 10.51, + "learning_rate": 1.4963964887112753e-05, + "loss": 2.4084, + "step": 410300 + }, + { + "epoch": 10.51, + "learning_rate": 1.4955425760836153e-05, + "loss": 2.4429, + "step": 410400 + }, + { + "epoch": 10.52, + "learning_rate": 1.4946886634559553e-05, + "loss": 2.3814, + "step": 410500 + }, + { + "epoch": 10.52, + "learning_rate": 1.4938347508282954e-05, + "loss": 2.6651, + "step": 410600 + }, + { + "epoch": 10.52, + "learning_rate": 1.4929808382006354e-05, + "loss": 2.3788, + "step": 410700 + }, + { + "epoch": 10.52, + "learning_rate": 1.4921269255729756e-05, + "loss": 2.3782, + "step": 410800 + }, + { + "epoch": 10.53, + "learning_rate": 1.4912730129453156e-05, + "loss": 2.444, + "step": 410900 + }, + { + "epoch": 10.53, + "learning_rate": 1.4904191003176557e-05, + "loss": 2.4786, + "step": 411000 + }, + { + "epoch": 10.53, + "learning_rate": 1.4895651876899957e-05, + "loss": 2.408, + "step": 411100 + }, + { + "epoch": 10.53, + "learning_rate": 1.4887112750623359e-05, + "loss": 2.348, + "step": 411200 + }, + { + "epoch": 10.54, + "learning_rate": 1.4878573624346759e-05, + "loss": 2.4413, + "step": 411300 + }, + { + "epoch": 10.54, + "learning_rate": 1.487003449807016e-05, + "loss": 2.2968, + "step": 411400 + }, + { + "epoch": 10.54, + "learning_rate": 1.486149537179356e-05, + "loss": 2.4226, + "step": 411500 + }, + { + "epoch": 10.54, + "learning_rate": 1.4852956245516961e-05, + "loss": 2.4854, + "step": 411600 + }, + { + "epoch": 10.55, + "learning_rate": 1.4844417119240361e-05, + "loss": 2.3968, + "step": 411700 + }, + { + "epoch": 10.55, + "learning_rate": 1.4835877992963761e-05, + "loss": 2.4726, + "step": 411800 + }, + { + "epoch": 10.55, + "learning_rate": 1.4827338866687163e-05, + "loss": 2.4465, + "step": 411900 + }, + { + "epoch": 10.55, + "learning_rate": 1.4818799740410561e-05, + "loss": 2.4776, + "step": 412000 + }, + { + "epoch": 10.56, + "learning_rate": 1.481026061413396e-05, + "loss": 2.3093, + "step": 412100 + }, + { + "epoch": 10.56, + "learning_rate": 1.4801721487857362e-05, + "loss": 2.4531, + "step": 412200 + }, + { + "epoch": 10.56, + "learning_rate": 1.4793182361580762e-05, + "loss": 2.462, + "step": 412300 + }, + { + "epoch": 10.56, + "learning_rate": 1.4784643235304164e-05, + "loss": 2.4598, + "step": 412400 + }, + { + "epoch": 10.57, + "learning_rate": 1.4776104109027564e-05, + "loss": 2.3987, + "step": 412500 + }, + { + "epoch": 10.57, + "learning_rate": 1.4767564982750965e-05, + "loss": 2.5386, + "step": 412600 + }, + { + "epoch": 10.57, + "learning_rate": 1.4759025856474365e-05, + "loss": 2.4256, + "step": 412700 + }, + { + "epoch": 10.57, + "learning_rate": 1.4750486730197765e-05, + "loss": 2.4934, + "step": 412800 + }, + { + "epoch": 10.58, + "learning_rate": 1.4741947603921166e-05, + "loss": 2.4629, + "step": 412900 + }, + { + "epoch": 10.58, + "learning_rate": 1.4733408477644566e-05, + "loss": 2.6451, + "step": 413000 + }, + { + "epoch": 10.58, + "learning_rate": 1.4724869351367968e-05, + "loss": 2.2613, + "step": 413100 + }, + { + "epoch": 10.59, + "learning_rate": 1.4716330225091368e-05, + "loss": 2.4202, + "step": 413200 + }, + { + "epoch": 10.59, + "learning_rate": 1.470779109881477e-05, + "loss": 2.5201, + "step": 413300 + }, + { + "epoch": 10.59, + "learning_rate": 1.4699251972538169e-05, + "loss": 2.5148, + "step": 413400 + }, + { + "epoch": 10.59, + "learning_rate": 1.469071284626157e-05, + "loss": 2.4361, + "step": 413500 + }, + { + "epoch": 10.6, + "learning_rate": 1.468217371998497e-05, + "loss": 2.2988, + "step": 413600 + }, + { + "epoch": 10.6, + "learning_rate": 1.4673634593708372e-05, + "loss": 2.3311, + "step": 413700 + }, + { + "epoch": 10.6, + "learning_rate": 1.4665095467431772e-05, + "loss": 2.5351, + "step": 413800 + }, + { + "epoch": 10.6, + "learning_rate": 1.4656556341155173e-05, + "loss": 2.4125, + "step": 413900 + }, + { + "epoch": 10.61, + "learning_rate": 1.4648017214878573e-05, + "loss": 2.3415, + "step": 414000 + }, + { + "epoch": 10.61, + "learning_rate": 1.4639478088601973e-05, + "loss": 2.4118, + "step": 414100 + }, + { + "epoch": 10.61, + "learning_rate": 1.4630938962325375e-05, + "loss": 2.3756, + "step": 414200 + }, + { + "epoch": 10.61, + "learning_rate": 1.4622399836048774e-05, + "loss": 2.5816, + "step": 414300 + }, + { + "epoch": 10.62, + "learning_rate": 1.4613860709772176e-05, + "loss": 2.4063, + "step": 414400 + }, + { + "epoch": 10.62, + "learning_rate": 1.4605321583495576e-05, + "loss": 2.5107, + "step": 414500 + }, + { + "epoch": 10.62, + "learning_rate": 1.4596782457218977e-05, + "loss": 2.3821, + "step": 414600 + }, + { + "epoch": 10.62, + "learning_rate": 1.4588243330942377e-05, + "loss": 2.4562, + "step": 414700 + }, + { + "epoch": 10.63, + "learning_rate": 1.4579704204665779e-05, + "loss": 2.3442, + "step": 414800 + }, + { + "epoch": 10.63, + "learning_rate": 1.4571165078389179e-05, + "loss": 2.4321, + "step": 414900 + }, + { + "epoch": 10.63, + "learning_rate": 1.456262595211258e-05, + "loss": 2.2946, + "step": 415000 + }, + { + "epoch": 10.63, + "learning_rate": 1.455408682583598e-05, + "loss": 2.2737, + "step": 415100 + }, + { + "epoch": 10.64, + "learning_rate": 1.4545547699559382e-05, + "loss": 2.4658, + "step": 415200 + }, + { + "epoch": 10.64, + "learning_rate": 1.4537008573282782e-05, + "loss": 2.4046, + "step": 415300 + }, + { + "epoch": 10.64, + "learning_rate": 1.4528469447006181e-05, + "loss": 2.443, + "step": 415400 + }, + { + "epoch": 10.64, + "learning_rate": 1.4519930320729583e-05, + "loss": 2.4225, + "step": 415500 + }, + { + "epoch": 10.65, + "learning_rate": 1.4511391194452983e-05, + "loss": 2.454, + "step": 415600 + }, + { + "epoch": 10.65, + "learning_rate": 1.4502852068176384e-05, + "loss": 2.5816, + "step": 415700 + }, + { + "epoch": 10.65, + "learning_rate": 1.4494312941899784e-05, + "loss": 2.3252, + "step": 415800 + }, + { + "epoch": 10.65, + "learning_rate": 1.4485773815623186e-05, + "loss": 2.192, + "step": 415900 + }, + { + "epoch": 10.66, + "learning_rate": 1.4477234689346586e-05, + "loss": 2.4979, + "step": 416000 + }, + { + "epoch": 10.66, + "learning_rate": 1.4468695563069987e-05, + "loss": 2.4697, + "step": 416100 + }, + { + "epoch": 10.66, + "learning_rate": 1.4460156436793387e-05, + "loss": 2.4549, + "step": 416200 + }, + { + "epoch": 10.66, + "learning_rate": 1.4451617310516789e-05, + "loss": 2.353, + "step": 416300 + }, + { + "epoch": 10.67, + "learning_rate": 1.4443078184240188e-05, + "loss": 2.3207, + "step": 416400 + }, + { + "epoch": 10.67, + "learning_rate": 1.443453905796359e-05, + "loss": 2.4548, + "step": 416500 + }, + { + "epoch": 10.67, + "learning_rate": 1.442599993168699e-05, + "loss": 2.54, + "step": 416600 + }, + { + "epoch": 10.67, + "learning_rate": 1.441746080541039e-05, + "loss": 2.5123, + "step": 416700 + }, + { + "epoch": 10.68, + "learning_rate": 1.4408921679133791e-05, + "loss": 2.3531, + "step": 416800 + }, + { + "epoch": 10.68, + "learning_rate": 1.4400382552857191e-05, + "loss": 2.5471, + "step": 416900 + }, + { + "epoch": 10.68, + "learning_rate": 1.4391843426580593e-05, + "loss": 2.3131, + "step": 417000 + }, + { + "epoch": 10.69, + "learning_rate": 1.4383304300303992e-05, + "loss": 2.3687, + "step": 417100 + }, + { + "epoch": 10.69, + "learning_rate": 1.4374765174027394e-05, + "loss": 2.5053, + "step": 417200 + }, + { + "epoch": 10.69, + "learning_rate": 1.4366226047750794e-05, + "loss": 2.3076, + "step": 417300 + }, + { + "epoch": 10.69, + "learning_rate": 1.4357686921474195e-05, + "loss": 2.5531, + "step": 417400 + }, + { + "epoch": 10.7, + "learning_rate": 1.4349147795197595e-05, + "loss": 2.5181, + "step": 417500 + }, + { + "epoch": 10.7, + "learning_rate": 1.4340608668920997e-05, + "loss": 2.4228, + "step": 417600 + }, + { + "epoch": 10.7, + "learning_rate": 1.4332069542644397e-05, + "loss": 2.529, + "step": 417700 + }, + { + "epoch": 10.7, + "learning_rate": 1.4323530416367798e-05, + "loss": 2.3639, + "step": 417800 + }, + { + "epoch": 10.71, + "learning_rate": 1.4314991290091198e-05, + "loss": 2.301, + "step": 417900 + }, + { + "epoch": 10.71, + "learning_rate": 1.43064521638146e-05, + "loss": 2.5247, + "step": 418000 + }, + { + "epoch": 10.71, + "learning_rate": 1.4297913037538e-05, + "loss": 2.4488, + "step": 418100 + }, + { + "epoch": 10.71, + "learning_rate": 1.42893739112614e-05, + "loss": 2.5862, + "step": 418200 + }, + { + "epoch": 10.72, + "learning_rate": 1.42808347849848e-05, + "loss": 2.1738, + "step": 418300 + }, + { + "epoch": 10.72, + "learning_rate": 1.42722956587082e-05, + "loss": 2.4319, + "step": 418400 + }, + { + "epoch": 10.72, + "learning_rate": 1.4263756532431602e-05, + "loss": 2.3571, + "step": 418500 + }, + { + "epoch": 10.72, + "learning_rate": 1.4255217406155002e-05, + "loss": 2.3887, + "step": 418600 + }, + { + "epoch": 10.73, + "learning_rate": 1.4246678279878404e-05, + "loss": 2.5333, + "step": 418700 + }, + { + "epoch": 10.73, + "learning_rate": 1.4238139153601804e-05, + "loss": 2.3024, + "step": 418800 + }, + { + "epoch": 10.73, + "learning_rate": 1.4229600027325205e-05, + "loss": 2.4367, + "step": 418900 + }, + { + "epoch": 10.73, + "learning_rate": 1.4221060901048605e-05, + "loss": 2.4616, + "step": 419000 + }, + { + "epoch": 10.74, + "learning_rate": 1.4212521774772006e-05, + "loss": 2.5386, + "step": 419100 + }, + { + "epoch": 10.74, + "learning_rate": 1.4203982648495406e-05, + "loss": 2.4597, + "step": 419200 + }, + { + "epoch": 10.74, + "learning_rate": 1.4195443522218808e-05, + "loss": 2.3248, + "step": 419300 + }, + { + "epoch": 10.74, + "learning_rate": 1.4186904395942208e-05, + "loss": 2.5811, + "step": 419400 + }, + { + "epoch": 10.75, + "learning_rate": 1.4178365269665608e-05, + "loss": 2.3857, + "step": 419500 + }, + { + "epoch": 10.75, + "learning_rate": 1.4169826143389009e-05, + "loss": 2.2709, + "step": 419600 + }, + { + "epoch": 10.75, + "learning_rate": 1.4161287017112409e-05, + "loss": 2.4978, + "step": 419700 + }, + { + "epoch": 10.75, + "learning_rate": 1.415274789083581e-05, + "loss": 2.4434, + "step": 419800 + }, + { + "epoch": 10.76, + "learning_rate": 1.414420876455921e-05, + "loss": 2.4206, + "step": 419900 + }, + { + "epoch": 10.76, + "learning_rate": 1.4135669638282612e-05, + "loss": 2.3937, + "step": 420000 + }, + { + "epoch": 10.76, + "learning_rate": 1.4127130512006012e-05, + "loss": 2.3275, + "step": 420100 + }, + { + "epoch": 10.76, + "learning_rate": 1.4118591385729413e-05, + "loss": 2.3028, + "step": 420200 + }, + { + "epoch": 10.77, + "learning_rate": 1.4110052259452813e-05, + "loss": 2.458, + "step": 420300 + }, + { + "epoch": 10.77, + "learning_rate": 1.4101513133176215e-05, + "loss": 2.5485, + "step": 420400 + }, + { + "epoch": 10.77, + "learning_rate": 1.4092974006899615e-05, + "loss": 2.4615, + "step": 420500 + }, + { + "epoch": 10.77, + "learning_rate": 1.4084434880623016e-05, + "loss": 2.4002, + "step": 420600 + }, + { + "epoch": 10.78, + "learning_rate": 1.4075895754346416e-05, + "loss": 2.3733, + "step": 420700 + }, + { + "epoch": 10.78, + "learning_rate": 1.4067356628069816e-05, + "loss": 2.4245, + "step": 420800 + }, + { + "epoch": 10.78, + "learning_rate": 1.4058817501793217e-05, + "loss": 2.3932, + "step": 420900 + }, + { + "epoch": 10.78, + "learning_rate": 1.4050278375516617e-05, + "loss": 2.5509, + "step": 421000 + }, + { + "epoch": 10.79, + "learning_rate": 1.4041739249240019e-05, + "loss": 2.3252, + "step": 421100 + }, + { + "epoch": 10.79, + "learning_rate": 1.4033200122963419e-05, + "loss": 2.4302, + "step": 421200 + }, + { + "epoch": 10.79, + "learning_rate": 1.402466099668682e-05, + "loss": 2.4215, + "step": 421300 + }, + { + "epoch": 10.8, + "learning_rate": 1.401612187041022e-05, + "loss": 2.314, + "step": 421400 + }, + { + "epoch": 10.8, + "learning_rate": 1.4007582744133622e-05, + "loss": 2.3784, + "step": 421500 + }, + { + "epoch": 10.8, + "learning_rate": 1.3999043617857021e-05, + "loss": 2.4432, + "step": 421600 + }, + { + "epoch": 10.8, + "learning_rate": 1.3990504491580423e-05, + "loss": 2.2759, + "step": 421700 + }, + { + "epoch": 10.81, + "learning_rate": 1.3981965365303823e-05, + "loss": 2.2875, + "step": 421800 + }, + { + "epoch": 10.81, + "learning_rate": 1.3973426239027224e-05, + "loss": 2.3403, + "step": 421900 + }, + { + "epoch": 10.81, + "learning_rate": 1.3964887112750624e-05, + "loss": 2.4533, + "step": 422000 + }, + { + "epoch": 10.81, + "learning_rate": 1.3956347986474024e-05, + "loss": 2.3412, + "step": 422100 + }, + { + "epoch": 10.82, + "learning_rate": 1.3947808860197426e-05, + "loss": 2.5796, + "step": 422200 + }, + { + "epoch": 10.82, + "learning_rate": 1.3939269733920826e-05, + "loss": 2.275, + "step": 422300 + }, + { + "epoch": 10.82, + "learning_rate": 1.3930730607644227e-05, + "loss": 2.3748, + "step": 422400 + }, + { + "epoch": 10.82, + "learning_rate": 1.3922191481367627e-05, + "loss": 2.5889, + "step": 422500 + }, + { + "epoch": 10.83, + "learning_rate": 1.3913652355091028e-05, + "loss": 2.5048, + "step": 422600 + }, + { + "epoch": 10.83, + "learning_rate": 1.3905113228814428e-05, + "loss": 2.4468, + "step": 422700 + }, + { + "epoch": 10.83, + "learning_rate": 1.389657410253783e-05, + "loss": 2.229, + "step": 422800 + }, + { + "epoch": 10.83, + "learning_rate": 1.388803497626123e-05, + "loss": 2.4576, + "step": 422900 + }, + { + "epoch": 10.84, + "learning_rate": 1.3879495849984631e-05, + "loss": 2.4826, + "step": 423000 + }, + { + "epoch": 10.84, + "learning_rate": 1.3870956723708031e-05, + "loss": 2.436, + "step": 423100 + }, + { + "epoch": 10.84, + "learning_rate": 1.3862417597431433e-05, + "loss": 2.3486, + "step": 423200 + }, + { + "epoch": 10.84, + "learning_rate": 1.3853878471154833e-05, + "loss": 2.4808, + "step": 423300 + }, + { + "epoch": 10.85, + "learning_rate": 1.3845339344878232e-05, + "loss": 2.5013, + "step": 423400 + }, + { + "epoch": 10.85, + "learning_rate": 1.3836800218601634e-05, + "loss": 2.4706, + "step": 423500 + }, + { + "epoch": 10.85, + "learning_rate": 1.3828261092325034e-05, + "loss": 2.3887, + "step": 423600 + }, + { + "epoch": 10.85, + "learning_rate": 1.3819721966048435e-05, + "loss": 2.3089, + "step": 423700 + }, + { + "epoch": 10.86, + "learning_rate": 1.3811182839771835e-05, + "loss": 2.3603, + "step": 423800 + }, + { + "epoch": 10.86, + "learning_rate": 1.3802643713495237e-05, + "loss": 2.2136, + "step": 423900 + }, + { + "epoch": 10.86, + "learning_rate": 1.3794104587218637e-05, + "loss": 2.3481, + "step": 424000 + }, + { + "epoch": 10.86, + "learning_rate": 1.3785565460942038e-05, + "loss": 2.2901, + "step": 424100 + }, + { + "epoch": 10.87, + "learning_rate": 1.3777026334665438e-05, + "loss": 2.3176, + "step": 424200 + }, + { + "epoch": 10.87, + "learning_rate": 1.376848720838884e-05, + "loss": 2.4147, + "step": 424300 + }, + { + "epoch": 10.87, + "learning_rate": 1.375994808211224e-05, + "loss": 2.5023, + "step": 424400 + }, + { + "epoch": 10.87, + "learning_rate": 1.3751408955835641e-05, + "loss": 2.468, + "step": 424500 + }, + { + "epoch": 10.88, + "learning_rate": 1.374286982955904e-05, + "loss": 2.3227, + "step": 424600 + }, + { + "epoch": 10.88, + "learning_rate": 1.373433070328244e-05, + "loss": 2.3984, + "step": 424700 + }, + { + "epoch": 10.88, + "learning_rate": 1.3725791577005842e-05, + "loss": 2.393, + "step": 424800 + }, + { + "epoch": 10.88, + "learning_rate": 1.3717252450729242e-05, + "loss": 2.4668, + "step": 424900 + }, + { + "epoch": 10.89, + "learning_rate": 1.3708713324452644e-05, + "loss": 2.5237, + "step": 425000 + }, + { + "epoch": 10.89, + "learning_rate": 1.3700174198176043e-05, + "loss": 2.3748, + "step": 425100 + }, + { + "epoch": 10.89, + "learning_rate": 1.3691635071899445e-05, + "loss": 2.3424, + "step": 425200 + }, + { + "epoch": 10.9, + "learning_rate": 1.3683095945622845e-05, + "loss": 2.4823, + "step": 425300 + }, + { + "epoch": 10.9, + "learning_rate": 1.3674556819346246e-05, + "loss": 2.4264, + "step": 425400 + }, + { + "epoch": 10.9, + "learning_rate": 1.3666017693069646e-05, + "loss": 2.459, + "step": 425500 + }, + { + "epoch": 10.9, + "learning_rate": 1.3657478566793048e-05, + "loss": 2.4273, + "step": 425600 + }, + { + "epoch": 10.91, + "learning_rate": 1.3648939440516448e-05, + "loss": 2.3875, + "step": 425700 + }, + { + "epoch": 10.91, + "learning_rate": 1.364040031423985e-05, + "loss": 2.39, + "step": 425800 + }, + { + "epoch": 10.91, + "learning_rate": 1.3631861187963249e-05, + "loss": 2.5833, + "step": 425900 + }, + { + "epoch": 10.91, + "learning_rate": 1.3623322061686649e-05, + "loss": 2.5368, + "step": 426000 + }, + { + "epoch": 10.92, + "learning_rate": 1.361478293541005e-05, + "loss": 2.2617, + "step": 426100 + }, + { + "epoch": 10.92, + "learning_rate": 1.360624380913345e-05, + "loss": 2.4038, + "step": 426200 + }, + { + "epoch": 10.92, + "learning_rate": 1.3597704682856852e-05, + "loss": 2.3052, + "step": 426300 + }, + { + "epoch": 10.92, + "learning_rate": 1.3589165556580252e-05, + "loss": 2.3489, + "step": 426400 + }, + { + "epoch": 10.93, + "learning_rate": 1.3580626430303653e-05, + "loss": 2.4012, + "step": 426500 + }, + { + "epoch": 10.93, + "learning_rate": 1.3572087304027053e-05, + "loss": 2.3726, + "step": 426600 + }, + { + "epoch": 10.93, + "learning_rate": 1.3563548177750455e-05, + "loss": 2.3671, + "step": 426700 + }, + { + "epoch": 10.93, + "learning_rate": 1.3555009051473855e-05, + "loss": 2.3331, + "step": 426800 + }, + { + "epoch": 10.94, + "learning_rate": 1.3546469925197256e-05, + "loss": 2.2823, + "step": 426900 + }, + { + "epoch": 10.94, + "learning_rate": 1.3537930798920656e-05, + "loss": 2.4966, + "step": 427000 + }, + { + "epoch": 10.94, + "learning_rate": 1.3529391672644057e-05, + "loss": 2.233, + "step": 427100 + }, + { + "epoch": 10.94, + "learning_rate": 1.3520852546367457e-05, + "loss": 2.5499, + "step": 427200 + }, + { + "epoch": 10.95, + "learning_rate": 1.3512313420090857e-05, + "loss": 2.4112, + "step": 427300 + }, + { + "epoch": 10.95, + "learning_rate": 1.3503774293814259e-05, + "loss": 2.2821, + "step": 427400 + }, + { + "epoch": 10.95, + "learning_rate": 1.3495235167537659e-05, + "loss": 2.3467, + "step": 427500 + }, + { + "epoch": 10.95, + "learning_rate": 1.348669604126106e-05, + "loss": 2.436, + "step": 427600 + }, + { + "epoch": 10.96, + "learning_rate": 1.347815691498446e-05, + "loss": 2.3273, + "step": 427700 + }, + { + "epoch": 10.96, + "learning_rate": 1.3469617788707862e-05, + "loss": 2.3415, + "step": 427800 + }, + { + "epoch": 10.96, + "learning_rate": 1.3461078662431261e-05, + "loss": 2.4805, + "step": 427900 + }, + { + "epoch": 10.96, + "learning_rate": 1.3452539536154663e-05, + "loss": 2.1796, + "step": 428000 + }, + { + "epoch": 10.97, + "learning_rate": 1.3444000409878063e-05, + "loss": 2.2917, + "step": 428100 + }, + { + "epoch": 10.97, + "learning_rate": 1.3435461283601464e-05, + "loss": 2.3663, + "step": 428200 + }, + { + "epoch": 10.97, + "learning_rate": 1.3426922157324864e-05, + "loss": 2.3382, + "step": 428300 + }, + { + "epoch": 10.97, + "learning_rate": 1.3418383031048266e-05, + "loss": 2.5369, + "step": 428400 + }, + { + "epoch": 10.98, + "learning_rate": 1.3409843904771666e-05, + "loss": 2.4268, + "step": 428500 + }, + { + "epoch": 10.98, + "learning_rate": 1.3401304778495065e-05, + "loss": 2.6242, + "step": 428600 + }, + { + "epoch": 10.98, + "learning_rate": 1.3392765652218467e-05, + "loss": 2.2714, + "step": 428700 + }, + { + "epoch": 10.98, + "learning_rate": 1.3384226525941867e-05, + "loss": 2.5422, + "step": 428800 + }, + { + "epoch": 10.99, + "learning_rate": 1.3375687399665268e-05, + "loss": 2.4616, + "step": 428900 + }, + { + "epoch": 10.99, + "learning_rate": 1.3367148273388668e-05, + "loss": 2.47, + "step": 429000 + }, + { + "epoch": 10.99, + "learning_rate": 1.335860914711207e-05, + "loss": 2.4516, + "step": 429100 + }, + { + "epoch": 10.99, + "learning_rate": 1.335007002083547e-05, + "loss": 2.26, + "step": 429200 + }, + { + "epoch": 11.0, + "learning_rate": 1.3341530894558871e-05, + "loss": 2.2387, + "step": 429300 + }, + { + "epoch": 11.0, + "learning_rate": 1.3332991768282271e-05, + "loss": 2.3875, + "step": 429400 + }, + { + "epoch": 11.0, + "learning_rate": 1.3324452642005673e-05, + "loss": 2.441, + "step": 429500 + }, + { + "epoch": 11.01, + "learning_rate": 1.3315913515729069e-05, + "loss": 2.4267, + "step": 429600 + }, + { + "epoch": 11.01, + "learning_rate": 1.330737438945247e-05, + "loss": 2.5106, + "step": 429700 + }, + { + "epoch": 11.01, + "learning_rate": 1.329883526317587e-05, + "loss": 2.2311, + "step": 429800 + }, + { + "epoch": 11.01, + "learning_rate": 1.3290296136899272e-05, + "loss": 2.3798, + "step": 429900 + }, + { + "epoch": 11.02, + "learning_rate": 1.3281757010622672e-05, + "loss": 2.313, + "step": 430000 + }, + { + "epoch": 11.02, + "learning_rate": 1.3273217884346073e-05, + "loss": 2.2904, + "step": 430100 + }, + { + "epoch": 11.02, + "learning_rate": 1.3264678758069473e-05, + "loss": 2.461, + "step": 430200 + }, + { + "epoch": 11.02, + "learning_rate": 1.3256139631792875e-05, + "loss": 2.422, + "step": 430300 + }, + { + "epoch": 11.03, + "learning_rate": 1.3247600505516275e-05, + "loss": 2.5934, + "step": 430400 + }, + { + "epoch": 11.03, + "learning_rate": 1.3239061379239676e-05, + "loss": 2.4207, + "step": 430500 + }, + { + "epoch": 11.03, + "learning_rate": 1.3230522252963076e-05, + "loss": 2.3234, + "step": 430600 + }, + { + "epoch": 11.03, + "learning_rate": 1.3221983126686478e-05, + "loss": 2.2679, + "step": 430700 + }, + { + "epoch": 11.04, + "learning_rate": 1.3213444000409877e-05, + "loss": 2.5224, + "step": 430800 + }, + { + "epoch": 11.04, + "learning_rate": 1.3204904874133277e-05, + "loss": 2.6412, + "step": 430900 + }, + { + "epoch": 11.04, + "learning_rate": 1.3196365747856679e-05, + "loss": 2.3197, + "step": 431000 + }, + { + "epoch": 11.04, + "learning_rate": 1.3187826621580079e-05, + "loss": 2.3307, + "step": 431100 + }, + { + "epoch": 11.05, + "learning_rate": 1.317928749530348e-05, + "loss": 2.3558, + "step": 431200 + }, + { + "epoch": 11.05, + "learning_rate": 1.317074836902688e-05, + "loss": 2.4468, + "step": 431300 + }, + { + "epoch": 11.05, + "learning_rate": 1.3162209242750282e-05, + "loss": 2.2056, + "step": 431400 + }, + { + "epoch": 11.05, + "learning_rate": 1.3153670116473682e-05, + "loss": 2.4161, + "step": 431500 + }, + { + "epoch": 11.06, + "learning_rate": 1.3145130990197083e-05, + "loss": 2.3587, + "step": 431600 + }, + { + "epoch": 11.06, + "learning_rate": 1.3136591863920483e-05, + "loss": 2.488, + "step": 431700 + }, + { + "epoch": 11.06, + "learning_rate": 1.3128052737643884e-05, + "loss": 2.3696, + "step": 431800 + }, + { + "epoch": 11.06, + "learning_rate": 1.3119513611367284e-05, + "loss": 2.4558, + "step": 431900 + }, + { + "epoch": 11.07, + "learning_rate": 1.3110974485090686e-05, + "loss": 2.3515, + "step": 432000 + }, + { + "epoch": 11.07, + "learning_rate": 1.3102435358814086e-05, + "loss": 2.1881, + "step": 432100 + }, + { + "epoch": 11.07, + "learning_rate": 1.3093896232537486e-05, + "loss": 2.3457, + "step": 432200 + }, + { + "epoch": 11.07, + "learning_rate": 1.3085357106260887e-05, + "loss": 2.3209, + "step": 432300 + }, + { + "epoch": 11.08, + "learning_rate": 1.3076817979984287e-05, + "loss": 2.3805, + "step": 432400 + }, + { + "epoch": 11.08, + "learning_rate": 1.3068278853707689e-05, + "loss": 2.4787, + "step": 432500 + }, + { + "epoch": 11.08, + "learning_rate": 1.3059739727431088e-05, + "loss": 2.4628, + "step": 432600 + }, + { + "epoch": 11.08, + "learning_rate": 1.305120060115449e-05, + "loss": 2.4036, + "step": 432700 + }, + { + "epoch": 11.09, + "learning_rate": 1.304266147487789e-05, + "loss": 2.2851, + "step": 432800 + }, + { + "epoch": 11.09, + "learning_rate": 1.3034122348601291e-05, + "loss": 2.3833, + "step": 432900 + }, + { + "epoch": 11.09, + "learning_rate": 1.3025583222324691e-05, + "loss": 2.2168, + "step": 433000 + }, + { + "epoch": 11.09, + "learning_rate": 1.3017044096048093e-05, + "loss": 2.4524, + "step": 433100 + }, + { + "epoch": 11.1, + "learning_rate": 1.3008504969771493e-05, + "loss": 2.3643, + "step": 433200 + }, + { + "epoch": 11.1, + "learning_rate": 1.2999965843494894e-05, + "loss": 2.335, + "step": 433300 + }, + { + "epoch": 11.1, + "learning_rate": 1.2991426717218294e-05, + "loss": 2.457, + "step": 433400 + }, + { + "epoch": 11.11, + "learning_rate": 1.2982887590941694e-05, + "loss": 2.2603, + "step": 433500 + }, + { + "epoch": 11.11, + "learning_rate": 1.2974348464665095e-05, + "loss": 2.3553, + "step": 433600 + }, + { + "epoch": 11.11, + "learning_rate": 1.2965809338388495e-05, + "loss": 2.4858, + "step": 433700 + }, + { + "epoch": 11.11, + "learning_rate": 1.2957270212111897e-05, + "loss": 2.5419, + "step": 433800 + }, + { + "epoch": 11.12, + "learning_rate": 1.2948731085835297e-05, + "loss": 2.3164, + "step": 433900 + }, + { + "epoch": 11.12, + "learning_rate": 1.2940191959558698e-05, + "loss": 2.333, + "step": 434000 + }, + { + "epoch": 11.12, + "learning_rate": 1.2931652833282098e-05, + "loss": 2.2533, + "step": 434100 + }, + { + "epoch": 11.12, + "learning_rate": 1.29231137070055e-05, + "loss": 2.5749, + "step": 434200 + }, + { + "epoch": 11.13, + "learning_rate": 1.29145745807289e-05, + "loss": 2.4057, + "step": 434300 + }, + { + "epoch": 11.13, + "learning_rate": 1.2906035454452301e-05, + "loss": 2.3012, + "step": 434400 + }, + { + "epoch": 11.13, + "learning_rate": 1.2897496328175701e-05, + "loss": 2.4791, + "step": 434500 + }, + { + "epoch": 11.13, + "learning_rate": 1.2888957201899102e-05, + "loss": 2.4412, + "step": 434600 + }, + { + "epoch": 11.14, + "learning_rate": 1.2880418075622502e-05, + "loss": 2.3738, + "step": 434700 + }, + { + "epoch": 11.14, + "learning_rate": 1.2871878949345904e-05, + "loss": 2.3775, + "step": 434800 + }, + { + "epoch": 11.14, + "learning_rate": 1.2863339823069304e-05, + "loss": 2.391, + "step": 434900 + }, + { + "epoch": 11.14, + "learning_rate": 1.2854800696792704e-05, + "loss": 2.437, + "step": 435000 + }, + { + "epoch": 11.15, + "learning_rate": 1.2846261570516105e-05, + "loss": 2.4915, + "step": 435100 + }, + { + "epoch": 11.15, + "learning_rate": 1.2837722444239505e-05, + "loss": 2.5653, + "step": 435200 + }, + { + "epoch": 11.15, + "learning_rate": 1.2829183317962906e-05, + "loss": 2.3941, + "step": 435300 + }, + { + "epoch": 11.15, + "learning_rate": 1.2820644191686306e-05, + "loss": 2.6094, + "step": 435400 + }, + { + "epoch": 11.16, + "learning_rate": 1.2812105065409708e-05, + "loss": 2.3746, + "step": 435500 + }, + { + "epoch": 11.16, + "learning_rate": 1.2803565939133108e-05, + "loss": 2.5318, + "step": 435600 + }, + { + "epoch": 11.16, + "learning_rate": 1.279502681285651e-05, + "loss": 2.3462, + "step": 435700 + }, + { + "epoch": 11.16, + "learning_rate": 1.2786487686579909e-05, + "loss": 2.3414, + "step": 435800 + }, + { + "epoch": 11.17, + "learning_rate": 1.277794856030331e-05, + "loss": 2.4234, + "step": 435900 + }, + { + "epoch": 11.17, + "learning_rate": 1.276940943402671e-05, + "loss": 2.3881, + "step": 436000 + }, + { + "epoch": 11.17, + "learning_rate": 1.2760870307750112e-05, + "loss": 2.4492, + "step": 436100 + }, + { + "epoch": 11.17, + "learning_rate": 1.2752331181473512e-05, + "loss": 2.3043, + "step": 436200 + }, + { + "epoch": 11.18, + "learning_rate": 1.2743792055196912e-05, + "loss": 2.2461, + "step": 436300 + }, + { + "epoch": 11.18, + "learning_rate": 1.2735252928920313e-05, + "loss": 2.4854, + "step": 436400 + }, + { + "epoch": 11.18, + "learning_rate": 1.2726713802643713e-05, + "loss": 2.3799, + "step": 436500 + }, + { + "epoch": 11.18, + "learning_rate": 1.2718174676367115e-05, + "loss": 2.4182, + "step": 436600 + }, + { + "epoch": 11.19, + "learning_rate": 1.2709635550090515e-05, + "loss": 2.3611, + "step": 436700 + }, + { + "epoch": 11.19, + "learning_rate": 1.2701096423813916e-05, + "loss": 2.3318, + "step": 436800 + }, + { + "epoch": 11.19, + "learning_rate": 1.2692557297537316e-05, + "loss": 2.3417, + "step": 436900 + }, + { + "epoch": 11.19, + "learning_rate": 1.2684018171260718e-05, + "loss": 2.5033, + "step": 437000 + }, + { + "epoch": 11.2, + "learning_rate": 1.2675479044984117e-05, + "loss": 2.371, + "step": 437100 + }, + { + "epoch": 11.2, + "learning_rate": 1.2666939918707519e-05, + "loss": 2.4119, + "step": 437200 + }, + { + "epoch": 11.2, + "learning_rate": 1.2658400792430919e-05, + "loss": 2.3906, + "step": 437300 + }, + { + "epoch": 11.21, + "learning_rate": 1.264986166615432e-05, + "loss": 2.3757, + "step": 437400 + }, + { + "epoch": 11.21, + "learning_rate": 1.264132253987772e-05, + "loss": 2.3541, + "step": 437500 + }, + { + "epoch": 11.21, + "learning_rate": 1.263278341360112e-05, + "loss": 2.3389, + "step": 437600 + }, + { + "epoch": 11.21, + "learning_rate": 1.2624244287324522e-05, + "loss": 2.4163, + "step": 437700 + }, + { + "epoch": 11.22, + "learning_rate": 1.2615705161047921e-05, + "loss": 2.2918, + "step": 437800 + }, + { + "epoch": 11.22, + "learning_rate": 1.2607166034771323e-05, + "loss": 2.4072, + "step": 437900 + }, + { + "epoch": 11.22, + "learning_rate": 1.2598626908494723e-05, + "loss": 2.2518, + "step": 438000 + }, + { + "epoch": 11.22, + "learning_rate": 1.2590087782218124e-05, + "loss": 2.4738, + "step": 438100 + }, + { + "epoch": 11.23, + "learning_rate": 1.2581548655941524e-05, + "loss": 2.2788, + "step": 438200 + }, + { + "epoch": 11.23, + "learning_rate": 1.2573009529664926e-05, + "loss": 2.3523, + "step": 438300 + }, + { + "epoch": 11.23, + "learning_rate": 1.2564470403388326e-05, + "loss": 2.5445, + "step": 438400 + }, + { + "epoch": 11.23, + "learning_rate": 1.2555931277111727e-05, + "loss": 2.391, + "step": 438500 + }, + { + "epoch": 11.24, + "learning_rate": 1.2547392150835127e-05, + "loss": 2.4425, + "step": 438600 + }, + { + "epoch": 11.24, + "learning_rate": 1.2538853024558529e-05, + "loss": 2.4095, + "step": 438700 + }, + { + "epoch": 11.24, + "learning_rate": 1.2530313898281928e-05, + "loss": 2.5371, + "step": 438800 + }, + { + "epoch": 11.24, + "learning_rate": 1.2521774772005328e-05, + "loss": 2.3896, + "step": 438900 + }, + { + "epoch": 11.25, + "learning_rate": 1.251323564572873e-05, + "loss": 2.4364, + "step": 439000 + }, + { + "epoch": 11.25, + "learning_rate": 1.250469651945213e-05, + "loss": 2.4755, + "step": 439100 + }, + { + "epoch": 11.25, + "learning_rate": 1.2496157393175531e-05, + "loss": 2.2854, + "step": 439200 + }, + { + "epoch": 11.25, + "learning_rate": 1.2487618266898931e-05, + "loss": 2.5707, + "step": 439300 + }, + { + "epoch": 11.26, + "learning_rate": 1.2479079140622333e-05, + "loss": 2.3527, + "step": 439400 + }, + { + "epoch": 11.26, + "learning_rate": 1.2470540014345733e-05, + "loss": 2.4184, + "step": 439500 + }, + { + "epoch": 11.26, + "learning_rate": 1.2462000888069134e-05, + "loss": 2.2633, + "step": 439600 + }, + { + "epoch": 11.26, + "learning_rate": 1.2453461761792534e-05, + "loss": 2.2966, + "step": 439700 + }, + { + "epoch": 11.27, + "learning_rate": 1.2444922635515936e-05, + "loss": 2.3354, + "step": 439800 + }, + { + "epoch": 11.27, + "learning_rate": 1.2436383509239335e-05, + "loss": 2.2734, + "step": 439900 + }, + { + "epoch": 11.27, + "learning_rate": 1.2427844382962737e-05, + "loss": 2.2729, + "step": 440000 + }, + { + "epoch": 11.27, + "learning_rate": 1.2419305256686137e-05, + "loss": 2.4983, + "step": 440100 + }, + { + "epoch": 11.28, + "learning_rate": 1.2410766130409537e-05, + "loss": 2.3772, + "step": 440200 + }, + { + "epoch": 11.28, + "learning_rate": 1.2402227004132938e-05, + "loss": 2.4128, + "step": 440300 + }, + { + "epoch": 11.28, + "learning_rate": 1.2393687877856338e-05, + "loss": 2.3746, + "step": 440400 + }, + { + "epoch": 11.28, + "learning_rate": 1.238514875157974e-05, + "loss": 2.4524, + "step": 440500 + }, + { + "epoch": 11.29, + "learning_rate": 1.237660962530314e-05, + "loss": 2.41, + "step": 440600 + }, + { + "epoch": 11.29, + "learning_rate": 1.2368070499026541e-05, + "loss": 2.3369, + "step": 440700 + }, + { + "epoch": 11.29, + "learning_rate": 1.235953137274994e-05, + "loss": 2.3432, + "step": 440800 + }, + { + "epoch": 11.29, + "learning_rate": 1.2350992246473342e-05, + "loss": 2.4829, + "step": 440900 + }, + { + "epoch": 11.3, + "learning_rate": 1.2342453120196742e-05, + "loss": 2.4246, + "step": 441000 + }, + { + "epoch": 11.3, + "learning_rate": 1.2333913993920144e-05, + "loss": 2.5314, + "step": 441100 + }, + { + "epoch": 11.3, + "learning_rate": 1.2325374867643544e-05, + "loss": 2.3748, + "step": 441200 + }, + { + "epoch": 11.3, + "learning_rate": 1.2316835741366945e-05, + "loss": 2.4952, + "step": 441300 + }, + { + "epoch": 11.31, + "learning_rate": 1.2308296615090345e-05, + "loss": 2.3488, + "step": 441400 + }, + { + "epoch": 11.31, + "learning_rate": 1.2299757488813745e-05, + "loss": 2.5501, + "step": 441500 + }, + { + "epoch": 11.31, + "learning_rate": 1.2291218362537146e-05, + "loss": 2.3405, + "step": 441600 + }, + { + "epoch": 11.32, + "learning_rate": 1.2282679236260546e-05, + "loss": 2.5885, + "step": 441700 + }, + { + "epoch": 11.32, + "learning_rate": 1.2274140109983948e-05, + "loss": 2.3693, + "step": 441800 + }, + { + "epoch": 11.32, + "learning_rate": 1.2265600983707348e-05, + "loss": 2.4954, + "step": 441900 + }, + { + "epoch": 11.32, + "learning_rate": 1.225706185743075e-05, + "loss": 2.4335, + "step": 442000 + }, + { + "epoch": 11.33, + "learning_rate": 1.2248522731154149e-05, + "loss": 2.3323, + "step": 442100 + }, + { + "epoch": 11.33, + "learning_rate": 1.223998360487755e-05, + "loss": 2.4281, + "step": 442200 + }, + { + "epoch": 11.33, + "learning_rate": 1.223144447860095e-05, + "loss": 2.4198, + "step": 442300 + }, + { + "epoch": 11.33, + "learning_rate": 1.2222905352324352e-05, + "loss": 2.515, + "step": 442400 + }, + { + "epoch": 11.34, + "learning_rate": 1.2214366226047752e-05, + "loss": 2.2983, + "step": 442500 + }, + { + "epoch": 11.34, + "learning_rate": 1.2205827099771153e-05, + "loss": 2.401, + "step": 442600 + }, + { + "epoch": 11.34, + "learning_rate": 1.2197287973494552e-05, + "loss": 2.3863, + "step": 442700 + }, + { + "epoch": 11.34, + "learning_rate": 1.2188748847217953e-05, + "loss": 2.3331, + "step": 442800 + }, + { + "epoch": 11.35, + "learning_rate": 1.2180209720941353e-05, + "loss": 2.4432, + "step": 442900 + }, + { + "epoch": 11.35, + "learning_rate": 1.2171670594664755e-05, + "loss": 2.4303, + "step": 443000 + }, + { + "epoch": 11.35, + "learning_rate": 1.2163131468388154e-05, + "loss": 2.3619, + "step": 443100 + }, + { + "epoch": 11.35, + "learning_rate": 1.2154592342111556e-05, + "loss": 2.18, + "step": 443200 + }, + { + "epoch": 11.36, + "learning_rate": 1.2146053215834956e-05, + "loss": 2.3913, + "step": 443300 + }, + { + "epoch": 11.36, + "learning_rate": 1.2137514089558356e-05, + "loss": 2.4747, + "step": 443400 + }, + { + "epoch": 11.36, + "learning_rate": 1.2128974963281757e-05, + "loss": 2.3488, + "step": 443500 + }, + { + "epoch": 11.36, + "learning_rate": 1.2120435837005157e-05, + "loss": 2.4871, + "step": 443600 + }, + { + "epoch": 11.37, + "learning_rate": 1.2111896710728559e-05, + "loss": 2.3957, + "step": 443700 + }, + { + "epoch": 11.37, + "learning_rate": 1.2103357584451958e-05, + "loss": 2.5369, + "step": 443800 + }, + { + "epoch": 11.37, + "learning_rate": 1.209481845817536e-05, + "loss": 2.4831, + "step": 443900 + }, + { + "epoch": 11.37, + "learning_rate": 1.208627933189876e-05, + "loss": 2.3505, + "step": 444000 + }, + { + "epoch": 11.38, + "learning_rate": 1.2077740205622161e-05, + "loss": 2.4901, + "step": 444100 + }, + { + "epoch": 11.38, + "learning_rate": 1.2069201079345561e-05, + "loss": 2.3502, + "step": 444200 + }, + { + "epoch": 11.38, + "learning_rate": 1.2060661953068963e-05, + "loss": 2.2579, + "step": 444300 + }, + { + "epoch": 11.38, + "learning_rate": 1.2052122826792363e-05, + "loss": 2.3426, + "step": 444400 + }, + { + "epoch": 11.39, + "learning_rate": 1.2043583700515764e-05, + "loss": 2.2187, + "step": 444500 + }, + { + "epoch": 11.39, + "learning_rate": 1.2035044574239164e-05, + "loss": 2.2769, + "step": 444600 + }, + { + "epoch": 11.39, + "learning_rate": 1.2026505447962564e-05, + "loss": 2.4459, + "step": 444700 + }, + { + "epoch": 11.39, + "learning_rate": 1.2017966321685965e-05, + "loss": 2.3774, + "step": 444800 + }, + { + "epoch": 11.4, + "learning_rate": 1.2009427195409365e-05, + "loss": 2.4176, + "step": 444900 + }, + { + "epoch": 11.4, + "learning_rate": 1.2000888069132767e-05, + "loss": 2.3299, + "step": 445000 + }, + { + "epoch": 11.4, + "learning_rate": 1.1992348942856167e-05, + "loss": 2.2312, + "step": 445100 + }, + { + "epoch": 11.4, + "learning_rate": 1.1983809816579568e-05, + "loss": 2.3771, + "step": 445200 + }, + { + "epoch": 11.41, + "learning_rate": 1.1975270690302968e-05, + "loss": 2.3815, + "step": 445300 + }, + { + "epoch": 11.41, + "learning_rate": 1.196673156402637e-05, + "loss": 2.3026, + "step": 445400 + }, + { + "epoch": 11.41, + "learning_rate": 1.195819243774977e-05, + "loss": 2.486, + "step": 445500 + }, + { + "epoch": 11.42, + "learning_rate": 1.1949653311473171e-05, + "loss": 2.4579, + "step": 445600 + }, + { + "epoch": 11.42, + "learning_rate": 1.1941114185196571e-05, + "loss": 2.298, + "step": 445700 + }, + { + "epoch": 11.42, + "learning_rate": 1.1932575058919972e-05, + "loss": 2.2979, + "step": 445800 + }, + { + "epoch": 11.42, + "learning_rate": 1.1924035932643372e-05, + "loss": 2.5232, + "step": 445900 + }, + { + "epoch": 11.43, + "learning_rate": 1.1915496806366772e-05, + "loss": 2.289, + "step": 446000 + }, + { + "epoch": 11.43, + "learning_rate": 1.1906957680090174e-05, + "loss": 2.2656, + "step": 446100 + }, + { + "epoch": 11.43, + "learning_rate": 1.1898418553813574e-05, + "loss": 2.3435, + "step": 446200 + }, + { + "epoch": 11.43, + "learning_rate": 1.1889879427536975e-05, + "loss": 2.3753, + "step": 446300 + }, + { + "epoch": 11.44, + "learning_rate": 1.1881340301260375e-05, + "loss": 2.4717, + "step": 446400 + }, + { + "epoch": 11.44, + "learning_rate": 1.1872801174983777e-05, + "loss": 2.4375, + "step": 446500 + }, + { + "epoch": 11.44, + "learning_rate": 1.1864262048707176e-05, + "loss": 2.3796, + "step": 446600 + }, + { + "epoch": 11.44, + "learning_rate": 1.1855722922430578e-05, + "loss": 2.3647, + "step": 446700 + }, + { + "epoch": 11.45, + "learning_rate": 1.1847183796153978e-05, + "loss": 2.5385, + "step": 446800 + }, + { + "epoch": 11.45, + "learning_rate": 1.183864466987738e-05, + "loss": 2.3397, + "step": 446900 + }, + { + "epoch": 11.45, + "learning_rate": 1.183010554360078e-05, + "loss": 2.3079, + "step": 447000 + }, + { + "epoch": 11.45, + "learning_rate": 1.182156641732418e-05, + "loss": 2.4263, + "step": 447100 + }, + { + "epoch": 11.46, + "learning_rate": 1.181302729104758e-05, + "loss": 2.3297, + "step": 447200 + }, + { + "epoch": 11.46, + "learning_rate": 1.180448816477098e-05, + "loss": 2.2843, + "step": 447300 + }, + { + "epoch": 11.46, + "learning_rate": 1.1795949038494382e-05, + "loss": 2.3753, + "step": 447400 + }, + { + "epoch": 11.46, + "learning_rate": 1.1787409912217782e-05, + "loss": 2.448, + "step": 447500 + }, + { + "epoch": 11.47, + "learning_rate": 1.1778870785941183e-05, + "loss": 2.2821, + "step": 447600 + }, + { + "epoch": 11.47, + "learning_rate": 1.1770331659664583e-05, + "loss": 2.4746, + "step": 447700 + }, + { + "epoch": 11.47, + "learning_rate": 1.1761792533387985e-05, + "loss": 2.4732, + "step": 447800 + }, + { + "epoch": 11.47, + "learning_rate": 1.1753253407111385e-05, + "loss": 2.3822, + "step": 447900 + }, + { + "epoch": 11.48, + "learning_rate": 1.1744714280834786e-05, + "loss": 2.3055, + "step": 448000 + }, + { + "epoch": 11.48, + "learning_rate": 1.1736175154558186e-05, + "loss": 2.2832, + "step": 448100 + }, + { + "epoch": 11.48, + "learning_rate": 1.1727636028281588e-05, + "loss": 2.4172, + "step": 448200 + }, + { + "epoch": 11.48, + "learning_rate": 1.1719096902004987e-05, + "loss": 2.5879, + "step": 448300 + }, + { + "epoch": 11.49, + "learning_rate": 1.1710557775728389e-05, + "loss": 2.3739, + "step": 448400 + }, + { + "epoch": 11.49, + "learning_rate": 1.1702018649451789e-05, + "loss": 2.412, + "step": 448500 + }, + { + "epoch": 11.49, + "learning_rate": 1.1693479523175189e-05, + "loss": 2.2493, + "step": 448600 + }, + { + "epoch": 11.49, + "learning_rate": 1.168494039689859e-05, + "loss": 2.5437, + "step": 448700 + }, + { + "epoch": 11.5, + "learning_rate": 1.167640127062199e-05, + "loss": 2.4373, + "step": 448800 + }, + { + "epoch": 11.5, + "learning_rate": 1.1667862144345392e-05, + "loss": 2.3772, + "step": 448900 + }, + { + "epoch": 11.5, + "learning_rate": 1.1659323018068792e-05, + "loss": 2.491, + "step": 449000 + }, + { + "epoch": 11.5, + "learning_rate": 1.1650783891792193e-05, + "loss": 2.3499, + "step": 449100 + }, + { + "epoch": 11.51, + "learning_rate": 1.1642244765515593e-05, + "loss": 2.4852, + "step": 449200 + }, + { + "epoch": 11.51, + "learning_rate": 1.1633705639238994e-05, + "loss": 2.2343, + "step": 449300 + }, + { + "epoch": 11.51, + "learning_rate": 1.1625166512962394e-05, + "loss": 2.2328, + "step": 449400 + }, + { + "epoch": 11.52, + "learning_rate": 1.1616627386685796e-05, + "loss": 2.293, + "step": 449500 + }, + { + "epoch": 11.52, + "learning_rate": 1.1608088260409196e-05, + "loss": 2.2728, + "step": 449600 + }, + { + "epoch": 11.52, + "learning_rate": 1.1599549134132597e-05, + "loss": 2.3773, + "step": 449700 + }, + { + "epoch": 11.52, + "learning_rate": 1.1591010007855997e-05, + "loss": 2.5609, + "step": 449800 + }, + { + "epoch": 11.53, + "learning_rate": 1.1582470881579397e-05, + "loss": 2.3769, + "step": 449900 + }, + { + "epoch": 11.53, + "learning_rate": 1.1573931755302799e-05, + "loss": 2.4467, + "step": 450000 + }, + { + "epoch": 11.53, + "learning_rate": 1.1565392629026198e-05, + "loss": 2.4738, + "step": 450100 + }, + { + "epoch": 11.53, + "learning_rate": 1.15568535027496e-05, + "loss": 2.3608, + "step": 450200 + }, + { + "epoch": 11.54, + "learning_rate": 1.1548314376473e-05, + "loss": 2.307, + "step": 450300 + }, + { + "epoch": 11.54, + "learning_rate": 1.1539775250196401e-05, + "loss": 2.386, + "step": 450400 + }, + { + "epoch": 11.54, + "learning_rate": 1.1531236123919801e-05, + "loss": 2.5637, + "step": 450500 + }, + { + "epoch": 11.54, + "learning_rate": 1.1522696997643203e-05, + "loss": 2.3824, + "step": 450600 + }, + { + "epoch": 11.55, + "learning_rate": 1.1514157871366603e-05, + "loss": 2.4299, + "step": 450700 + }, + { + "epoch": 11.55, + "learning_rate": 1.1505618745090004e-05, + "loss": 2.359, + "step": 450800 + }, + { + "epoch": 11.55, + "learning_rate": 1.1497079618813404e-05, + "loss": 2.4056, + "step": 450900 + }, + { + "epoch": 11.55, + "learning_rate": 1.1488540492536806e-05, + "loss": 2.4729, + "step": 451000 + }, + { + "epoch": 11.56, + "learning_rate": 1.1480001366260205e-05, + "loss": 2.4801, + "step": 451100 + }, + { + "epoch": 11.56, + "learning_rate": 1.1471462239983605e-05, + "loss": 2.4627, + "step": 451200 + }, + { + "epoch": 11.56, + "learning_rate": 1.1462923113707007e-05, + "loss": 2.2652, + "step": 451300 + }, + { + "epoch": 11.56, + "learning_rate": 1.1454383987430407e-05, + "loss": 2.3068, + "step": 451400 + }, + { + "epoch": 11.57, + "learning_rate": 1.1445844861153807e-05, + "loss": 2.4512, + "step": 451500 + }, + { + "epoch": 11.57, + "learning_rate": 1.1437305734877208e-05, + "loss": 2.382, + "step": 451600 + }, + { + "epoch": 11.57, + "learning_rate": 1.1428766608600608e-05, + "loss": 2.2647, + "step": 451700 + }, + { + "epoch": 11.57, + "learning_rate": 1.1420227482324008e-05, + "loss": 2.3449, + "step": 451800 + }, + { + "epoch": 11.58, + "learning_rate": 1.141168835604741e-05, + "loss": 2.3118, + "step": 451900 + }, + { + "epoch": 11.58, + "learning_rate": 1.140314922977081e-05, + "loss": 2.4311, + "step": 452000 + }, + { + "epoch": 11.58, + "learning_rate": 1.139461010349421e-05, + "loss": 2.2573, + "step": 452100 + }, + { + "epoch": 11.58, + "learning_rate": 1.138607097721761e-05, + "loss": 2.2957, + "step": 452200 + }, + { + "epoch": 11.59, + "learning_rate": 1.1377531850941012e-05, + "loss": 2.4046, + "step": 452300 + }, + { + "epoch": 11.59, + "learning_rate": 1.1368992724664412e-05, + "loss": 2.3726, + "step": 452400 + }, + { + "epoch": 11.59, + "learning_rate": 1.1360453598387814e-05, + "loss": 2.2297, + "step": 452500 + }, + { + "epoch": 11.59, + "learning_rate": 1.1351914472111213e-05, + "loss": 2.4903, + "step": 452600 + }, + { + "epoch": 11.6, + "learning_rate": 1.1343375345834615e-05, + "loss": 2.2608, + "step": 452700 + }, + { + "epoch": 11.6, + "learning_rate": 1.1334836219558015e-05, + "loss": 2.2555, + "step": 452800 + }, + { + "epoch": 11.6, + "learning_rate": 1.1326297093281416e-05, + "loss": 2.4198, + "step": 452900 + }, + { + "epoch": 11.6, + "learning_rate": 1.1317757967004816e-05, + "loss": 2.2123, + "step": 453000 + }, + { + "epoch": 11.61, + "learning_rate": 1.1309218840728216e-05, + "loss": 2.4742, + "step": 453100 + }, + { + "epoch": 11.61, + "learning_rate": 1.1300679714451618e-05, + "loss": 2.413, + "step": 453200 + }, + { + "epoch": 11.61, + "learning_rate": 1.1292140588175017e-05, + "loss": 2.5419, + "step": 453300 + }, + { + "epoch": 11.61, + "learning_rate": 1.1283601461898419e-05, + "loss": 2.3603, + "step": 453400 + }, + { + "epoch": 11.62, + "learning_rate": 1.1275062335621819e-05, + "loss": 2.5009, + "step": 453500 + }, + { + "epoch": 11.62, + "learning_rate": 1.126652320934522e-05, + "loss": 2.3219, + "step": 453600 + }, + { + "epoch": 11.62, + "learning_rate": 1.125798408306862e-05, + "loss": 2.2979, + "step": 453700 + }, + { + "epoch": 11.63, + "learning_rate": 1.1249444956792022e-05, + "loss": 2.3667, + "step": 453800 + }, + { + "epoch": 11.63, + "learning_rate": 1.1240905830515422e-05, + "loss": 2.4106, + "step": 453900 + }, + { + "epoch": 11.63, + "learning_rate": 1.1232366704238823e-05, + "loss": 2.4102, + "step": 454000 + }, + { + "epoch": 11.63, + "learning_rate": 1.1223827577962223e-05, + "loss": 2.5527, + "step": 454100 + }, + { + "epoch": 11.64, + "learning_rate": 1.1215288451685625e-05, + "loss": 2.2172, + "step": 454200 + }, + { + "epoch": 11.64, + "learning_rate": 1.1206749325409024e-05, + "loss": 2.5036, + "step": 454300 + }, + { + "epoch": 11.64, + "learning_rate": 1.1198210199132424e-05, + "loss": 2.4123, + "step": 454400 + }, + { + "epoch": 11.64, + "learning_rate": 1.1189671072855826e-05, + "loss": 2.5316, + "step": 454500 + }, + { + "epoch": 11.65, + "learning_rate": 1.1181131946579226e-05, + "loss": 2.126, + "step": 454600 + }, + { + "epoch": 11.65, + "learning_rate": 1.1172592820302627e-05, + "loss": 2.4036, + "step": 454700 + }, + { + "epoch": 11.65, + "learning_rate": 1.1164053694026027e-05, + "loss": 2.3227, + "step": 454800 + }, + { + "epoch": 11.65, + "learning_rate": 1.1155514567749429e-05, + "loss": 2.5125, + "step": 454900 + }, + { + "epoch": 11.66, + "learning_rate": 1.1146975441472829e-05, + "loss": 2.3822, + "step": 455000 + }, + { + "epoch": 11.66, + "learning_rate": 1.113843631519623e-05, + "loss": 2.2891, + "step": 455100 + }, + { + "epoch": 11.66, + "learning_rate": 1.112989718891963e-05, + "loss": 2.3755, + "step": 455200 + }, + { + "epoch": 11.66, + "learning_rate": 1.1121358062643031e-05, + "loss": 2.3895, + "step": 455300 + }, + { + "epoch": 11.67, + "learning_rate": 1.1112818936366431e-05, + "loss": 2.4506, + "step": 455400 + }, + { + "epoch": 11.67, + "learning_rate": 1.1104279810089833e-05, + "loss": 2.3326, + "step": 455500 + }, + { + "epoch": 11.67, + "learning_rate": 1.1095740683813233e-05, + "loss": 2.2889, + "step": 455600 + }, + { + "epoch": 11.67, + "learning_rate": 1.1087201557536633e-05, + "loss": 2.3132, + "step": 455700 + }, + { + "epoch": 11.68, + "learning_rate": 1.1078662431260034e-05, + "loss": 2.5263, + "step": 455800 + }, + { + "epoch": 11.68, + "learning_rate": 1.1070123304983434e-05, + "loss": 2.2508, + "step": 455900 + }, + { + "epoch": 11.68, + "learning_rate": 1.1061584178706836e-05, + "loss": 2.2724, + "step": 456000 + }, + { + "epoch": 11.68, + "learning_rate": 1.1053045052430235e-05, + "loss": 2.3963, + "step": 456100 + }, + { + "epoch": 11.69, + "learning_rate": 1.1044505926153637e-05, + "loss": 2.354, + "step": 456200 + }, + { + "epoch": 11.69, + "learning_rate": 1.1035966799877037e-05, + "loss": 2.3623, + "step": 456300 + }, + { + "epoch": 11.69, + "learning_rate": 1.1027427673600438e-05, + "loss": 2.3601, + "step": 456400 + }, + { + "epoch": 11.69, + "learning_rate": 1.1018888547323838e-05, + "loss": 2.3038, + "step": 456500 + }, + { + "epoch": 11.7, + "learning_rate": 1.101034942104724e-05, + "loss": 2.3352, + "step": 456600 + }, + { + "epoch": 11.7, + "learning_rate": 1.100181029477064e-05, + "loss": 2.4089, + "step": 456700 + }, + { + "epoch": 11.7, + "learning_rate": 1.0993271168494041e-05, + "loss": 2.3569, + "step": 456800 + }, + { + "epoch": 11.7, + "learning_rate": 1.0984732042217441e-05, + "loss": 2.3535, + "step": 456900 + }, + { + "epoch": 11.71, + "learning_rate": 1.0976192915940841e-05, + "loss": 2.3747, + "step": 457000 + }, + { + "epoch": 11.71, + "learning_rate": 1.0967653789664242e-05, + "loss": 2.4065, + "step": 457100 + }, + { + "epoch": 11.71, + "learning_rate": 1.0959114663387642e-05, + "loss": 2.524, + "step": 457200 + }, + { + "epoch": 11.71, + "learning_rate": 1.0950575537111044e-05, + "loss": 2.3989, + "step": 457300 + }, + { + "epoch": 11.72, + "learning_rate": 1.0942036410834444e-05, + "loss": 2.3519, + "step": 457400 + }, + { + "epoch": 11.72, + "learning_rate": 1.0933497284557845e-05, + "loss": 2.4087, + "step": 457500 + }, + { + "epoch": 11.72, + "learning_rate": 1.0924958158281245e-05, + "loss": 2.3264, + "step": 457600 + }, + { + "epoch": 11.73, + "learning_rate": 1.0916419032004647e-05, + "loss": 2.223, + "step": 457700 + }, + { + "epoch": 11.73, + "learning_rate": 1.0907879905728046e-05, + "loss": 2.3941, + "step": 457800 + }, + { + "epoch": 11.73, + "learning_rate": 1.0899340779451448e-05, + "loss": 2.2319, + "step": 457900 + }, + { + "epoch": 11.73, + "learning_rate": 1.0890801653174848e-05, + "loss": 2.2449, + "step": 458000 + }, + { + "epoch": 11.74, + "learning_rate": 1.088226252689825e-05, + "loss": 2.316, + "step": 458100 + }, + { + "epoch": 11.74, + "learning_rate": 1.087372340062165e-05, + "loss": 2.4809, + "step": 458200 + }, + { + "epoch": 11.74, + "learning_rate": 1.0865184274345049e-05, + "loss": 2.2141, + "step": 458300 + }, + { + "epoch": 11.74, + "learning_rate": 1.085664514806845e-05, + "loss": 2.2298, + "step": 458400 + }, + { + "epoch": 11.75, + "learning_rate": 1.084810602179185e-05, + "loss": 2.1899, + "step": 458500 + }, + { + "epoch": 11.75, + "learning_rate": 1.0839566895515252e-05, + "loss": 2.347, + "step": 458600 + }, + { + "epoch": 11.75, + "learning_rate": 1.0831027769238652e-05, + "loss": 2.489, + "step": 458700 + }, + { + "epoch": 11.75, + "learning_rate": 1.0822488642962053e-05, + "loss": 2.2976, + "step": 458800 + }, + { + "epoch": 11.76, + "learning_rate": 1.0813949516685453e-05, + "loss": 2.3948, + "step": 458900 + }, + { + "epoch": 11.76, + "learning_rate": 1.0805410390408855e-05, + "loss": 2.3881, + "step": 459000 + }, + { + "epoch": 11.76, + "learning_rate": 1.0796871264132255e-05, + "loss": 2.2191, + "step": 459100 + }, + { + "epoch": 11.76, + "learning_rate": 1.0788332137855656e-05, + "loss": 2.3434, + "step": 459200 + }, + { + "epoch": 11.77, + "learning_rate": 1.0779793011579056e-05, + "loss": 2.4501, + "step": 459300 + }, + { + "epoch": 11.77, + "learning_rate": 1.0771253885302458e-05, + "loss": 2.3837, + "step": 459400 + }, + { + "epoch": 11.77, + "learning_rate": 1.0762714759025858e-05, + "loss": 2.3998, + "step": 459500 + }, + { + "epoch": 11.77, + "learning_rate": 1.0754175632749257e-05, + "loss": 2.3488, + "step": 459600 + }, + { + "epoch": 11.78, + "learning_rate": 1.0745636506472659e-05, + "loss": 2.3771, + "step": 459700 + }, + { + "epoch": 11.78, + "learning_rate": 1.0737097380196059e-05, + "loss": 2.4188, + "step": 459800 + }, + { + "epoch": 11.78, + "learning_rate": 1.072855825391946e-05, + "loss": 2.4259, + "step": 459900 + }, + { + "epoch": 11.78, + "learning_rate": 1.072001912764286e-05, + "loss": 2.4023, + "step": 460000 + }, + { + "epoch": 11.79, + "learning_rate": 1.0711480001366262e-05, + "loss": 2.4128, + "step": 460100 + }, + { + "epoch": 11.79, + "learning_rate": 1.070294087508966e-05, + "loss": 2.3, + "step": 460200 + }, + { + "epoch": 11.79, + "learning_rate": 1.0694401748813061e-05, + "loss": 2.2961, + "step": 460300 + }, + { + "epoch": 11.79, + "learning_rate": 1.0685862622536461e-05, + "loss": 2.2691, + "step": 460400 + }, + { + "epoch": 11.8, + "learning_rate": 1.0677323496259863e-05, + "loss": 2.3439, + "step": 460500 + }, + { + "epoch": 11.8, + "learning_rate": 1.0668784369983263e-05, + "loss": 2.2766, + "step": 460600 + }, + { + "epoch": 11.8, + "learning_rate": 1.0660245243706664e-05, + "loss": 2.2719, + "step": 460700 + }, + { + "epoch": 11.8, + "learning_rate": 1.0651706117430064e-05, + "loss": 2.4551, + "step": 460800 + }, + { + "epoch": 11.81, + "learning_rate": 1.0643166991153466e-05, + "loss": 2.3891, + "step": 460900 + }, + { + "epoch": 11.81, + "learning_rate": 1.0634627864876866e-05, + "loss": 2.186, + "step": 461000 + }, + { + "epoch": 11.81, + "learning_rate": 1.0626088738600267e-05, + "loss": 2.4296, + "step": 461100 + }, + { + "epoch": 11.81, + "learning_rate": 1.0617549612323667e-05, + "loss": 2.646, + "step": 461200 + }, + { + "epoch": 11.82, + "learning_rate": 1.0609010486047068e-05, + "loss": 2.2722, + "step": 461300 + }, + { + "epoch": 11.82, + "learning_rate": 1.0600471359770468e-05, + "loss": 2.3699, + "step": 461400 + }, + { + "epoch": 11.82, + "learning_rate": 1.0591932233493868e-05, + "loss": 2.5427, + "step": 461500 + }, + { + "epoch": 11.82, + "learning_rate": 1.058339310721727e-05, + "loss": 2.4104, + "step": 461600 + }, + { + "epoch": 11.83, + "learning_rate": 1.057485398094067e-05, + "loss": 2.2132, + "step": 461700 + }, + { + "epoch": 11.83, + "learning_rate": 1.0566314854664071e-05, + "loss": 2.2677, + "step": 461800 + }, + { + "epoch": 11.83, + "learning_rate": 1.0557775728387471e-05, + "loss": 2.5672, + "step": 461900 + }, + { + "epoch": 11.84, + "learning_rate": 1.0549236602110873e-05, + "loss": 2.3104, + "step": 462000 + }, + { + "epoch": 11.84, + "learning_rate": 1.0540697475834272e-05, + "loss": 2.3313, + "step": 462100 + }, + { + "epoch": 11.84, + "learning_rate": 1.0532158349557674e-05, + "loss": 2.2646, + "step": 462200 + }, + { + "epoch": 11.84, + "learning_rate": 1.0523619223281074e-05, + "loss": 2.3769, + "step": 462300 + }, + { + "epoch": 11.85, + "learning_rate": 1.0515080097004475e-05, + "loss": 2.4205, + "step": 462400 + }, + { + "epoch": 11.85, + "learning_rate": 1.0506540970727875e-05, + "loss": 2.4462, + "step": 462500 + }, + { + "epoch": 11.85, + "learning_rate": 1.0498001844451277e-05, + "loss": 2.3421, + "step": 462600 + }, + { + "epoch": 11.85, + "learning_rate": 1.0489462718174677e-05, + "loss": 2.3728, + "step": 462700 + }, + { + "epoch": 11.86, + "learning_rate": 1.0480923591898076e-05, + "loss": 2.2476, + "step": 462800 + }, + { + "epoch": 11.86, + "learning_rate": 1.0472384465621478e-05, + "loss": 2.2497, + "step": 462900 + }, + { + "epoch": 11.86, + "learning_rate": 1.0463845339344878e-05, + "loss": 2.5004, + "step": 463000 + }, + { + "epoch": 11.86, + "learning_rate": 1.045530621306828e-05, + "loss": 2.323, + "step": 463100 + }, + { + "epoch": 11.87, + "learning_rate": 1.044676708679168e-05, + "loss": 2.2561, + "step": 463200 + }, + { + "epoch": 11.87, + "learning_rate": 1.043822796051508e-05, + "loss": 2.4128, + "step": 463300 + }, + { + "epoch": 11.87, + "learning_rate": 1.042968883423848e-05, + "loss": 2.1714, + "step": 463400 + }, + { + "epoch": 11.87, + "learning_rate": 1.0421149707961882e-05, + "loss": 2.475, + "step": 463500 + }, + { + "epoch": 11.88, + "learning_rate": 1.0412610581685282e-05, + "loss": 2.2916, + "step": 463600 + }, + { + "epoch": 11.88, + "learning_rate": 1.0404071455408684e-05, + "loss": 2.1948, + "step": 463700 + }, + { + "epoch": 11.88, + "learning_rate": 1.0395532329132083e-05, + "loss": 2.4248, + "step": 463800 + }, + { + "epoch": 11.88, + "learning_rate": 1.0386993202855485e-05, + "loss": 2.5744, + "step": 463900 + }, + { + "epoch": 11.89, + "learning_rate": 1.0378454076578885e-05, + "loss": 2.2751, + "step": 464000 + }, + { + "epoch": 11.89, + "learning_rate": 1.0369914950302285e-05, + "loss": 2.5041, + "step": 464100 + }, + { + "epoch": 11.89, + "learning_rate": 1.0361375824025686e-05, + "loss": 2.4111, + "step": 464200 + }, + { + "epoch": 11.89, + "learning_rate": 1.0352836697749086e-05, + "loss": 2.3358, + "step": 464300 + }, + { + "epoch": 11.9, + "learning_rate": 1.0344297571472488e-05, + "loss": 2.3797, + "step": 464400 + }, + { + "epoch": 11.9, + "learning_rate": 1.0335758445195888e-05, + "loss": 2.3143, + "step": 464500 + }, + { + "epoch": 11.9, + "learning_rate": 1.0327219318919289e-05, + "loss": 2.4891, + "step": 464600 + }, + { + "epoch": 11.9, + "learning_rate": 1.0318680192642689e-05, + "loss": 2.315, + "step": 464700 + }, + { + "epoch": 11.91, + "learning_rate": 1.031014106636609e-05, + "loss": 2.3748, + "step": 464800 + }, + { + "epoch": 11.91, + "learning_rate": 1.030160194008949e-05, + "loss": 2.3514, + "step": 464900 + }, + { + "epoch": 11.91, + "learning_rate": 1.0293062813812892e-05, + "loss": 2.3933, + "step": 465000 + }, + { + "epoch": 11.91, + "learning_rate": 1.0284523687536292e-05, + "loss": 2.5657, + "step": 465100 + }, + { + "epoch": 11.92, + "learning_rate": 1.0275984561259693e-05, + "loss": 2.3748, + "step": 465200 + }, + { + "epoch": 11.92, + "learning_rate": 1.0267445434983093e-05, + "loss": 2.3294, + "step": 465300 + }, + { + "epoch": 11.92, + "learning_rate": 1.0258906308706493e-05, + "loss": 2.3038, + "step": 465400 + }, + { + "epoch": 11.92, + "learning_rate": 1.0250367182429895e-05, + "loss": 2.3801, + "step": 465500 + }, + { + "epoch": 11.93, + "learning_rate": 1.0241828056153294e-05, + "loss": 2.2168, + "step": 465600 + }, + { + "epoch": 11.93, + "learning_rate": 1.0233288929876696e-05, + "loss": 2.2804, + "step": 465700 + }, + { + "epoch": 11.93, + "learning_rate": 1.0224749803600096e-05, + "loss": 2.3886, + "step": 465800 + }, + { + "epoch": 11.94, + "learning_rate": 1.0216210677323497e-05, + "loss": 2.5101, + "step": 465900 + }, + { + "epoch": 11.94, + "learning_rate": 1.0207671551046897e-05, + "loss": 2.477, + "step": 466000 + }, + { + "epoch": 11.94, + "learning_rate": 1.0199132424770299e-05, + "loss": 2.2089, + "step": 466100 + }, + { + "epoch": 11.94, + "learning_rate": 1.0190593298493699e-05, + "loss": 2.4535, + "step": 466200 + }, + { + "epoch": 11.95, + "learning_rate": 1.01820541722171e-05, + "loss": 2.2373, + "step": 466300 + }, + { + "epoch": 11.95, + "learning_rate": 1.01735150459405e-05, + "loss": 2.3935, + "step": 466400 + }, + { + "epoch": 11.95, + "learning_rate": 1.0164975919663902e-05, + "loss": 2.3327, + "step": 466500 + }, + { + "epoch": 11.95, + "learning_rate": 1.0156436793387301e-05, + "loss": 2.3853, + "step": 466600 + }, + { + "epoch": 11.96, + "learning_rate": 1.0147897667110701e-05, + "loss": 2.2725, + "step": 466700 + }, + { + "epoch": 11.96, + "learning_rate": 1.0139358540834103e-05, + "loss": 2.3891, + "step": 466800 + }, + { + "epoch": 11.96, + "learning_rate": 1.0130819414557503e-05, + "loss": 2.3847, + "step": 466900 + }, + { + "epoch": 11.96, + "learning_rate": 1.0122280288280904e-05, + "loss": 2.6729, + "step": 467000 + }, + { + "epoch": 11.97, + "learning_rate": 1.0113741162004304e-05, + "loss": 2.4303, + "step": 467100 + }, + { + "epoch": 11.97, + "learning_rate": 1.0105202035727706e-05, + "loss": 2.6356, + "step": 467200 + }, + { + "epoch": 11.97, + "learning_rate": 1.0096662909451105e-05, + "loss": 2.3694, + "step": 467300 + }, + { + "epoch": 11.97, + "learning_rate": 1.0088123783174507e-05, + "loss": 2.2721, + "step": 467400 + }, + { + "epoch": 11.98, + "learning_rate": 1.0079584656897907e-05, + "loss": 2.4132, + "step": 467500 + }, + { + "epoch": 11.98, + "learning_rate": 1.0071045530621308e-05, + "loss": 2.3676, + "step": 467600 + }, + { + "epoch": 11.98, + "learning_rate": 1.0062506404344708e-05, + "loss": 2.2973, + "step": 467700 + }, + { + "epoch": 11.98, + "learning_rate": 1.005396727806811e-05, + "loss": 2.2985, + "step": 467800 + }, + { + "epoch": 11.99, + "learning_rate": 1.004542815179151e-05, + "loss": 2.3777, + "step": 467900 + }, + { + "epoch": 11.99, + "learning_rate": 1.003688902551491e-05, + "loss": 2.3674, + "step": 468000 + }, + { + "epoch": 11.99, + "learning_rate": 1.0028349899238311e-05, + "loss": 2.3113, + "step": 468100 + }, + { + "epoch": 11.99, + "learning_rate": 1.0019810772961711e-05, + "loss": 2.3669, + "step": 468200 + }, + { + "epoch": 12.0, + "learning_rate": 1.0011271646685112e-05, + "loss": 2.363, + "step": 468300 + }, + { + "epoch": 12.0, + "learning_rate": 1.0002732520408512e-05, + "loss": 2.3587, + "step": 468400 + }, + { + "epoch": 12.0, + "learning_rate": 9.994193394131914e-06, + "loss": 2.2949, + "step": 468500 + }, + { + "epoch": 12.0, + "learning_rate": 9.985654267855314e-06, + "loss": 2.3748, + "step": 468600 + }, + { + "epoch": 12.01, + "learning_rate": 9.977115141578715e-06, + "loss": 2.3995, + "step": 468700 + }, + { + "epoch": 12.01, + "learning_rate": 9.968576015302115e-06, + "loss": 2.3465, + "step": 468800 + }, + { + "epoch": 12.01, + "learning_rate": 9.960036889025517e-06, + "loss": 2.2719, + "step": 468900 + }, + { + "epoch": 12.01, + "learning_rate": 9.951497762748915e-06, + "loss": 2.1681, + "step": 469000 + }, + { + "epoch": 12.02, + "learning_rate": 9.942958636472316e-06, + "loss": 2.2478, + "step": 469100 + }, + { + "epoch": 12.02, + "learning_rate": 9.934419510195716e-06, + "loss": 2.3767, + "step": 469200 + }, + { + "epoch": 12.02, + "learning_rate": 9.925880383919118e-06, + "loss": 2.3495, + "step": 469300 + }, + { + "epoch": 12.02, + "learning_rate": 9.917341257642518e-06, + "loss": 2.4604, + "step": 469400 + }, + { + "epoch": 12.03, + "learning_rate": 9.90880213136592e-06, + "loss": 2.281, + "step": 469500 + }, + { + "epoch": 12.03, + "learning_rate": 9.900263005089319e-06, + "loss": 2.2163, + "step": 469600 + }, + { + "epoch": 12.03, + "learning_rate": 9.89172387881272e-06, + "loss": 2.2095, + "step": 469700 + }, + { + "epoch": 12.04, + "learning_rate": 9.88318475253612e-06, + "loss": 2.3108, + "step": 469800 + }, + { + "epoch": 12.04, + "learning_rate": 9.87464562625952e-06, + "loss": 2.2822, + "step": 469900 + }, + { + "epoch": 12.04, + "learning_rate": 9.866106499982922e-06, + "loss": 2.4625, + "step": 470000 + }, + { + "epoch": 12.04, + "learning_rate": 9.857567373706322e-06, + "loss": 2.291, + "step": 470100 + }, + { + "epoch": 12.05, + "learning_rate": 9.849028247429723e-06, + "loss": 2.3437, + "step": 470200 + }, + { + "epoch": 12.05, + "learning_rate": 9.840489121153123e-06, + "loss": 2.3757, + "step": 470300 + }, + { + "epoch": 12.05, + "learning_rate": 9.831949994876525e-06, + "loss": 2.4156, + "step": 470400 + }, + { + "epoch": 12.05, + "learning_rate": 9.823410868599925e-06, + "loss": 2.3461, + "step": 470500 + }, + { + "epoch": 12.06, + "learning_rate": 9.814871742323326e-06, + "loss": 2.303, + "step": 470600 + }, + { + "epoch": 12.06, + "learning_rate": 9.806332616046726e-06, + "loss": 2.3965, + "step": 470700 + }, + { + "epoch": 12.06, + "learning_rate": 9.797793489770127e-06, + "loss": 2.4787, + "step": 470800 + }, + { + "epoch": 12.06, + "learning_rate": 9.789254363493527e-06, + "loss": 2.2388, + "step": 470900 + }, + { + "epoch": 12.07, + "learning_rate": 9.780715237216929e-06, + "loss": 2.1807, + "step": 471000 + }, + { + "epoch": 12.07, + "learning_rate": 9.772176110940329e-06, + "loss": 2.3898, + "step": 471100 + }, + { + "epoch": 12.07, + "learning_rate": 9.763636984663729e-06, + "loss": 2.3237, + "step": 471200 + }, + { + "epoch": 12.07, + "learning_rate": 9.75509785838713e-06, + "loss": 2.3549, + "step": 471300 + }, + { + "epoch": 12.08, + "learning_rate": 9.74655873211053e-06, + "loss": 2.4313, + "step": 471400 + }, + { + "epoch": 12.08, + "learning_rate": 9.738019605833932e-06, + "loss": 2.3672, + "step": 471500 + }, + { + "epoch": 12.08, + "learning_rate": 9.729480479557331e-06, + "loss": 2.4768, + "step": 471600 + }, + { + "epoch": 12.08, + "learning_rate": 9.720941353280733e-06, + "loss": 2.4814, + "step": 471700 + }, + { + "epoch": 12.09, + "learning_rate": 9.712402227004133e-06, + "loss": 2.2807, + "step": 471800 + }, + { + "epoch": 12.09, + "learning_rate": 9.703863100727534e-06, + "loss": 2.4814, + "step": 471900 + }, + { + "epoch": 12.09, + "learning_rate": 9.695323974450934e-06, + "loss": 2.1597, + "step": 472000 + }, + { + "epoch": 12.09, + "learning_rate": 9.686784848174336e-06, + "loss": 2.5317, + "step": 472100 + }, + { + "epoch": 12.1, + "learning_rate": 9.678245721897736e-06, + "loss": 2.3068, + "step": 472200 + }, + { + "epoch": 12.1, + "learning_rate": 9.669706595621137e-06, + "loss": 2.5735, + "step": 472300 + }, + { + "epoch": 12.1, + "learning_rate": 9.661167469344537e-06, + "loss": 2.3386, + "step": 472400 + }, + { + "epoch": 12.1, + "learning_rate": 9.652628343067937e-06, + "loss": 2.1372, + "step": 472500 + }, + { + "epoch": 12.11, + "learning_rate": 9.644089216791338e-06, + "loss": 2.3445, + "step": 472600 + }, + { + "epoch": 12.11, + "learning_rate": 9.635550090514738e-06, + "loss": 2.3872, + "step": 472700 + }, + { + "epoch": 12.11, + "learning_rate": 9.62701096423814e-06, + "loss": 2.3145, + "step": 472800 + }, + { + "epoch": 12.11, + "learning_rate": 9.61847183796154e-06, + "loss": 2.3415, + "step": 472900 + }, + { + "epoch": 12.12, + "learning_rate": 9.609932711684941e-06, + "loss": 2.4201, + "step": 473000 + }, + { + "epoch": 12.12, + "learning_rate": 9.601393585408341e-06, + "loss": 2.4163, + "step": 473100 + }, + { + "epoch": 12.12, + "learning_rate": 9.592854459131743e-06, + "loss": 2.2282, + "step": 473200 + }, + { + "epoch": 12.12, + "learning_rate": 9.584315332855142e-06, + "loss": 2.3337, + "step": 473300 + }, + { + "epoch": 12.13, + "learning_rate": 9.575776206578544e-06, + "loss": 2.5322, + "step": 473400 + }, + { + "epoch": 12.13, + "learning_rate": 9.567237080301944e-06, + "loss": 2.2854, + "step": 473500 + }, + { + "epoch": 12.13, + "learning_rate": 9.558697954025345e-06, + "loss": 2.3463, + "step": 473600 + }, + { + "epoch": 12.13, + "learning_rate": 9.550158827748745e-06, + "loss": 2.1978, + "step": 473700 + }, + { + "epoch": 12.14, + "learning_rate": 9.541619701472145e-06, + "loss": 2.4242, + "step": 473800 + }, + { + "epoch": 12.14, + "learning_rate": 9.533080575195547e-06, + "loss": 2.3271, + "step": 473900 + }, + { + "epoch": 12.14, + "learning_rate": 9.524541448918947e-06, + "loss": 2.3342, + "step": 474000 + }, + { + "epoch": 12.15, + "learning_rate": 9.516002322642348e-06, + "loss": 2.4026, + "step": 474100 + }, + { + "epoch": 12.15, + "learning_rate": 9.507463196365748e-06, + "loss": 2.404, + "step": 474200 + }, + { + "epoch": 12.15, + "learning_rate": 9.49892407008915e-06, + "loss": 2.3344, + "step": 474300 + }, + { + "epoch": 12.15, + "learning_rate": 9.49038494381255e-06, + "loss": 2.4367, + "step": 474400 + }, + { + "epoch": 12.16, + "learning_rate": 9.481845817535951e-06, + "loss": 2.3966, + "step": 474500 + }, + { + "epoch": 12.16, + "learning_rate": 9.47330669125935e-06, + "loss": 2.3476, + "step": 474600 + }, + { + "epoch": 12.16, + "learning_rate": 9.464767564982752e-06, + "loss": 2.2906, + "step": 474700 + }, + { + "epoch": 12.16, + "learning_rate": 9.456228438706152e-06, + "loss": 2.5284, + "step": 474800 + }, + { + "epoch": 12.17, + "learning_rate": 9.447689312429554e-06, + "loss": 2.3994, + "step": 474900 + }, + { + "epoch": 12.17, + "learning_rate": 9.439150186152954e-06, + "loss": 2.2996, + "step": 475000 + }, + { + "epoch": 12.17, + "learning_rate": 9.430611059876353e-06, + "loss": 2.3269, + "step": 475100 + }, + { + "epoch": 12.17, + "learning_rate": 9.422071933599755e-06, + "loss": 2.3843, + "step": 475200 + }, + { + "epoch": 12.18, + "learning_rate": 9.413532807323155e-06, + "loss": 2.4582, + "step": 475300 + }, + { + "epoch": 12.18, + "learning_rate": 9.404993681046556e-06, + "loss": 2.473, + "step": 475400 + }, + { + "epoch": 12.18, + "learning_rate": 9.396454554769956e-06, + "loss": 2.2896, + "step": 475500 + }, + { + "epoch": 12.18, + "learning_rate": 9.387915428493358e-06, + "loss": 2.3914, + "step": 475600 + }, + { + "epoch": 12.19, + "learning_rate": 9.379376302216758e-06, + "loss": 2.3402, + "step": 475700 + }, + { + "epoch": 12.19, + "learning_rate": 9.370837175940159e-06, + "loss": 2.4208, + "step": 475800 + }, + { + "epoch": 12.19, + "learning_rate": 9.362298049663559e-06, + "loss": 2.4028, + "step": 475900 + }, + { + "epoch": 12.19, + "learning_rate": 9.35375892338696e-06, + "loss": 2.3018, + "step": 476000 + }, + { + "epoch": 12.2, + "learning_rate": 9.34521979711036e-06, + "loss": 2.3314, + "step": 476100 + }, + { + "epoch": 12.2, + "learning_rate": 9.336680670833762e-06, + "loss": 2.2372, + "step": 476200 + }, + { + "epoch": 12.2, + "learning_rate": 9.328141544557162e-06, + "loss": 2.267, + "step": 476300 + }, + { + "epoch": 12.2, + "learning_rate": 9.319602418280562e-06, + "loss": 2.2984, + "step": 476400 + }, + { + "epoch": 12.21, + "learning_rate": 9.311063292003963e-06, + "loss": 2.2714, + "step": 476500 + }, + { + "epoch": 12.21, + "learning_rate": 9.302524165727363e-06, + "loss": 2.4087, + "step": 476600 + }, + { + "epoch": 12.21, + "learning_rate": 9.293985039450765e-06, + "loss": 2.3647, + "step": 476700 + }, + { + "epoch": 12.21, + "learning_rate": 9.285445913174164e-06, + "loss": 2.3222, + "step": 476800 + }, + { + "epoch": 12.22, + "learning_rate": 9.276906786897566e-06, + "loss": 2.3455, + "step": 476900 + }, + { + "epoch": 12.22, + "learning_rate": 9.268367660620966e-06, + "loss": 2.2878, + "step": 477000 + }, + { + "epoch": 12.22, + "learning_rate": 9.259828534344367e-06, + "loss": 2.2694, + "step": 477100 + }, + { + "epoch": 12.22, + "learning_rate": 9.251289408067767e-06, + "loss": 2.3443, + "step": 477200 + }, + { + "epoch": 12.23, + "learning_rate": 9.242750281791169e-06, + "loss": 2.2092, + "step": 477300 + }, + { + "epoch": 12.23, + "learning_rate": 9.234211155514569e-06, + "loss": 2.336, + "step": 477400 + }, + { + "epoch": 12.23, + "learning_rate": 9.22567202923797e-06, + "loss": 2.4295, + "step": 477500 + }, + { + "epoch": 12.23, + "learning_rate": 9.21713290296137e-06, + "loss": 2.323, + "step": 477600 + }, + { + "epoch": 12.24, + "learning_rate": 9.20859377668477e-06, + "loss": 2.4561, + "step": 477700 + }, + { + "epoch": 12.24, + "learning_rate": 9.20005465040817e-06, + "loss": 2.2738, + "step": 477800 + }, + { + "epoch": 12.24, + "learning_rate": 9.191515524131571e-06, + "loss": 2.2431, + "step": 477900 + }, + { + "epoch": 12.25, + "learning_rate": 9.182976397854971e-06, + "loss": 2.2159, + "step": 478000 + }, + { + "epoch": 12.25, + "learning_rate": 9.174437271578373e-06, + "loss": 2.4646, + "step": 478100 + }, + { + "epoch": 12.25, + "learning_rate": 9.165898145301773e-06, + "loss": 2.6006, + "step": 478200 + }, + { + "epoch": 12.25, + "learning_rate": 9.157359019025172e-06, + "loss": 2.4189, + "step": 478300 + }, + { + "epoch": 12.26, + "learning_rate": 9.148819892748574e-06, + "loss": 2.4249, + "step": 478400 + }, + { + "epoch": 12.26, + "learning_rate": 9.140280766471974e-06, + "loss": 2.441, + "step": 478500 + }, + { + "epoch": 12.26, + "learning_rate": 9.131741640195375e-06, + "loss": 2.3329, + "step": 478600 + }, + { + "epoch": 12.26, + "learning_rate": 9.123202513918775e-06, + "loss": 2.3692, + "step": 478700 + }, + { + "epoch": 12.27, + "learning_rate": 9.114663387642177e-06, + "loss": 2.2183, + "step": 478800 + }, + { + "epoch": 12.27, + "learning_rate": 9.106124261365577e-06, + "loss": 2.2556, + "step": 478900 + }, + { + "epoch": 12.27, + "learning_rate": 9.097585135088978e-06, + "loss": 2.2597, + "step": 479000 + }, + { + "epoch": 12.27, + "learning_rate": 9.089046008812378e-06, + "loss": 2.2906, + "step": 479100 + }, + { + "epoch": 12.28, + "learning_rate": 9.08050688253578e-06, + "loss": 2.4126, + "step": 479200 + }, + { + "epoch": 12.28, + "learning_rate": 9.07196775625918e-06, + "loss": 2.4707, + "step": 479300 + }, + { + "epoch": 12.28, + "learning_rate": 9.063428629982581e-06, + "loss": 2.2973, + "step": 479400 + }, + { + "epoch": 12.28, + "learning_rate": 9.05488950370598e-06, + "loss": 2.3448, + "step": 479500 + }, + { + "epoch": 12.29, + "learning_rate": 9.04635037742938e-06, + "loss": 2.2431, + "step": 479600 + }, + { + "epoch": 12.29, + "learning_rate": 9.037811251152782e-06, + "loss": 2.1868, + "step": 479700 + }, + { + "epoch": 12.29, + "learning_rate": 9.029272124876182e-06, + "loss": 2.4682, + "step": 479800 + }, + { + "epoch": 12.29, + "learning_rate": 9.020732998599584e-06, + "loss": 2.2371, + "step": 479900 + }, + { + "epoch": 12.3, + "learning_rate": 9.012193872322983e-06, + "loss": 2.3678, + "step": 480000 + }, + { + "epoch": 12.3, + "learning_rate": 9.003654746046385e-06, + "loss": 2.2952, + "step": 480100 + }, + { + "epoch": 12.3, + "learning_rate": 8.995115619769785e-06, + "loss": 2.3105, + "step": 480200 + }, + { + "epoch": 12.3, + "learning_rate": 8.986576493493186e-06, + "loss": 2.2523, + "step": 480300 + }, + { + "epoch": 12.31, + "learning_rate": 8.978037367216586e-06, + "loss": 2.2952, + "step": 480400 + }, + { + "epoch": 12.31, + "learning_rate": 8.969498240939988e-06, + "loss": 2.1216, + "step": 480500 + }, + { + "epoch": 12.31, + "learning_rate": 8.960959114663388e-06, + "loss": 2.2869, + "step": 480600 + }, + { + "epoch": 12.31, + "learning_rate": 8.95241998838679e-06, + "loss": 2.2056, + "step": 480700 + }, + { + "epoch": 12.32, + "learning_rate": 8.943880862110189e-06, + "loss": 2.4429, + "step": 480800 + }, + { + "epoch": 12.32, + "learning_rate": 8.935341735833589e-06, + "loss": 2.4611, + "step": 480900 + }, + { + "epoch": 12.32, + "learning_rate": 8.92680260955699e-06, + "loss": 2.3674, + "step": 481000 + }, + { + "epoch": 12.32, + "learning_rate": 8.91826348328039e-06, + "loss": 2.2883, + "step": 481100 + }, + { + "epoch": 12.33, + "learning_rate": 8.909724357003792e-06, + "loss": 2.3646, + "step": 481200 + }, + { + "epoch": 12.33, + "learning_rate": 8.901185230727192e-06, + "loss": 2.1688, + "step": 481300 + }, + { + "epoch": 12.33, + "learning_rate": 8.892646104450593e-06, + "loss": 2.4568, + "step": 481400 + }, + { + "epoch": 12.33, + "learning_rate": 8.884106978173993e-06, + "loss": 2.3121, + "step": 481500 + }, + { + "epoch": 12.34, + "learning_rate": 8.875567851897395e-06, + "loss": 2.2561, + "step": 481600 + }, + { + "epoch": 12.34, + "learning_rate": 8.867028725620795e-06, + "loss": 2.441, + "step": 481700 + }, + { + "epoch": 12.34, + "learning_rate": 8.858489599344196e-06, + "loss": 2.279, + "step": 481800 + }, + { + "epoch": 12.35, + "learning_rate": 8.849950473067596e-06, + "loss": 2.2011, + "step": 481900 + }, + { + "epoch": 12.35, + "learning_rate": 8.841411346790998e-06, + "loss": 2.4275, + "step": 482000 + }, + { + "epoch": 12.35, + "learning_rate": 8.832872220514397e-06, + "loss": 2.4149, + "step": 482100 + }, + { + "epoch": 12.35, + "learning_rate": 8.824333094237797e-06, + "loss": 2.3443, + "step": 482200 + }, + { + "epoch": 12.36, + "learning_rate": 8.815793967961199e-06, + "loss": 2.4427, + "step": 482300 + }, + { + "epoch": 12.36, + "learning_rate": 8.807254841684599e-06, + "loss": 2.4005, + "step": 482400 + }, + { + "epoch": 12.36, + "learning_rate": 8.798715715408e-06, + "loss": 2.2363, + "step": 482500 + }, + { + "epoch": 12.36, + "learning_rate": 8.7901765891314e-06, + "loss": 2.3844, + "step": 482600 + }, + { + "epoch": 12.37, + "learning_rate": 8.781637462854802e-06, + "loss": 2.2652, + "step": 482700 + }, + { + "epoch": 12.37, + "learning_rate": 8.773098336578201e-06, + "loss": 2.4342, + "step": 482800 + }, + { + "epoch": 12.37, + "learning_rate": 8.764559210301603e-06, + "loss": 2.313, + "step": 482900 + }, + { + "epoch": 12.37, + "learning_rate": 8.756020084025003e-06, + "loss": 2.3698, + "step": 483000 + }, + { + "epoch": 12.38, + "learning_rate": 8.747480957748404e-06, + "loss": 2.2924, + "step": 483100 + }, + { + "epoch": 12.38, + "learning_rate": 8.738941831471804e-06, + "loss": 2.3884, + "step": 483200 + }, + { + "epoch": 12.38, + "learning_rate": 8.730402705195206e-06, + "loss": 2.3257, + "step": 483300 + }, + { + "epoch": 12.38, + "learning_rate": 8.721863578918606e-06, + "loss": 2.315, + "step": 483400 + }, + { + "epoch": 12.39, + "learning_rate": 8.713324452642005e-06, + "loss": 2.4, + "step": 483500 + }, + { + "epoch": 12.39, + "learning_rate": 8.704785326365407e-06, + "loss": 2.4332, + "step": 483600 + }, + { + "epoch": 12.39, + "learning_rate": 8.696246200088807e-06, + "loss": 2.4837, + "step": 483700 + }, + { + "epoch": 12.39, + "learning_rate": 8.687707073812208e-06, + "loss": 2.2764, + "step": 483800 + }, + { + "epoch": 12.4, + "learning_rate": 8.679167947535608e-06, + "loss": 2.3559, + "step": 483900 + }, + { + "epoch": 12.4, + "learning_rate": 8.67062882125901e-06, + "loss": 2.4048, + "step": 484000 + }, + { + "epoch": 12.4, + "learning_rate": 8.66208969498241e-06, + "loss": 2.2568, + "step": 484100 + }, + { + "epoch": 12.4, + "learning_rate": 8.653550568705811e-06, + "loss": 2.1394, + "step": 484200 + }, + { + "epoch": 12.41, + "learning_rate": 8.645011442429211e-06, + "loss": 2.3126, + "step": 484300 + }, + { + "epoch": 12.41, + "learning_rate": 8.636472316152613e-06, + "loss": 2.365, + "step": 484400 + }, + { + "epoch": 12.41, + "learning_rate": 8.627933189876013e-06, + "loss": 2.3888, + "step": 484500 + }, + { + "epoch": 12.41, + "learning_rate": 8.619394063599414e-06, + "loss": 2.309, + "step": 484600 + }, + { + "epoch": 12.42, + "learning_rate": 8.610854937322814e-06, + "loss": 2.3908, + "step": 484700 + }, + { + "epoch": 12.42, + "learning_rate": 8.602315811046214e-06, + "loss": 2.2101, + "step": 484800 + }, + { + "epoch": 12.42, + "learning_rate": 8.593776684769615e-06, + "loss": 2.2009, + "step": 484900 + }, + { + "epoch": 12.42, + "learning_rate": 8.585237558493015e-06, + "loss": 2.4389, + "step": 485000 + }, + { + "epoch": 12.43, + "learning_rate": 8.576698432216417e-06, + "loss": 2.4014, + "step": 485100 + }, + { + "epoch": 12.43, + "learning_rate": 8.568159305939817e-06, + "loss": 2.2933, + "step": 485200 + }, + { + "epoch": 12.43, + "learning_rate": 8.559620179663218e-06, + "loss": 2.2193, + "step": 485300 + }, + { + "epoch": 12.43, + "learning_rate": 8.551081053386618e-06, + "loss": 2.2519, + "step": 485400 + }, + { + "epoch": 12.44, + "learning_rate": 8.54254192711002e-06, + "loss": 2.4025, + "step": 485500 + }, + { + "epoch": 12.44, + "learning_rate": 8.53400280083342e-06, + "loss": 2.4623, + "step": 485600 + }, + { + "epoch": 12.44, + "learning_rate": 8.525463674556821e-06, + "loss": 2.3038, + "step": 485700 + }, + { + "epoch": 12.44, + "learning_rate": 8.51692454828022e-06, + "loss": 2.3208, + "step": 485800 + }, + { + "epoch": 12.45, + "learning_rate": 8.508385422003622e-06, + "loss": 2.3442, + "step": 485900 + }, + { + "epoch": 12.45, + "learning_rate": 8.499846295727022e-06, + "loss": 2.2255, + "step": 486000 + }, + { + "epoch": 12.45, + "learning_rate": 8.491307169450422e-06, + "loss": 2.4155, + "step": 486100 + }, + { + "epoch": 12.46, + "learning_rate": 8.482768043173824e-06, + "loss": 2.3096, + "step": 486200 + }, + { + "epoch": 12.46, + "learning_rate": 8.474228916897223e-06, + "loss": 2.3084, + "step": 486300 + }, + { + "epoch": 12.46, + "learning_rate": 8.465689790620625e-06, + "loss": 2.3511, + "step": 486400 + }, + { + "epoch": 12.46, + "learning_rate": 8.457150664344025e-06, + "loss": 2.2618, + "step": 486500 + }, + { + "epoch": 12.47, + "learning_rate": 8.448611538067425e-06, + "loss": 2.4265, + "step": 486600 + }, + { + "epoch": 12.47, + "learning_rate": 8.440072411790825e-06, + "loss": 2.4689, + "step": 486700 + }, + { + "epoch": 12.47, + "learning_rate": 8.431533285514226e-06, + "loss": 2.4635, + "step": 486800 + }, + { + "epoch": 12.47, + "learning_rate": 8.422994159237626e-06, + "loss": 2.3413, + "step": 486900 + }, + { + "epoch": 12.48, + "learning_rate": 8.414455032961027e-06, + "loss": 2.4025, + "step": 487000 + }, + { + "epoch": 12.48, + "learning_rate": 8.405915906684427e-06, + "loss": 2.3439, + "step": 487100 + }, + { + "epoch": 12.48, + "learning_rate": 8.397376780407829e-06, + "loss": 2.1309, + "step": 487200 + }, + { + "epoch": 12.48, + "learning_rate": 8.388837654131229e-06, + "loss": 2.2746, + "step": 487300 + }, + { + "epoch": 12.49, + "learning_rate": 8.38029852785463e-06, + "loss": 2.2547, + "step": 487400 + }, + { + "epoch": 12.49, + "learning_rate": 8.37175940157803e-06, + "loss": 2.2953, + "step": 487500 + }, + { + "epoch": 12.49, + "learning_rate": 8.363220275301432e-06, + "loss": 2.2432, + "step": 487600 + }, + { + "epoch": 12.49, + "learning_rate": 8.354681149024832e-06, + "loss": 2.3293, + "step": 487700 + }, + { + "epoch": 12.5, + "learning_rate": 8.346142022748233e-06, + "loss": 2.4646, + "step": 487800 + }, + { + "epoch": 12.5, + "learning_rate": 8.337602896471633e-06, + "loss": 2.3454, + "step": 487900 + }, + { + "epoch": 12.5, + "learning_rate": 8.329063770195033e-06, + "loss": 2.3126, + "step": 488000 + }, + { + "epoch": 12.5, + "learning_rate": 8.320524643918434e-06, + "loss": 2.3632, + "step": 488100 + }, + { + "epoch": 12.51, + "learning_rate": 8.311985517641834e-06, + "loss": 2.27, + "step": 488200 + }, + { + "epoch": 12.51, + "learning_rate": 8.303446391365236e-06, + "loss": 2.4339, + "step": 488300 + }, + { + "epoch": 12.51, + "learning_rate": 8.294907265088636e-06, + "loss": 2.3237, + "step": 488400 + }, + { + "epoch": 12.51, + "learning_rate": 8.286368138812037e-06, + "loss": 2.4811, + "step": 488500 + }, + { + "epoch": 12.52, + "learning_rate": 8.277829012535437e-06, + "loss": 2.3788, + "step": 488600 + }, + { + "epoch": 12.52, + "learning_rate": 8.269289886258839e-06, + "loss": 2.3031, + "step": 488700 + }, + { + "epoch": 12.52, + "learning_rate": 8.260750759982238e-06, + "loss": 2.4946, + "step": 488800 + }, + { + "epoch": 12.52, + "learning_rate": 8.25221163370564e-06, + "loss": 2.386, + "step": 488900 + }, + { + "epoch": 12.53, + "learning_rate": 8.24367250742904e-06, + "loss": 2.3604, + "step": 489000 + }, + { + "epoch": 12.53, + "learning_rate": 8.235133381152441e-06, + "loss": 2.3326, + "step": 489100 + }, + { + "epoch": 12.53, + "learning_rate": 8.226594254875841e-06, + "loss": 2.3458, + "step": 489200 + }, + { + "epoch": 12.53, + "learning_rate": 8.218055128599241e-06, + "loss": 2.4537, + "step": 489300 + }, + { + "epoch": 12.54, + "learning_rate": 8.209516002322643e-06, + "loss": 2.445, + "step": 489400 + }, + { + "epoch": 12.54, + "learning_rate": 8.200976876046042e-06, + "loss": 2.3172, + "step": 489500 + }, + { + "epoch": 12.54, + "learning_rate": 8.192437749769444e-06, + "loss": 2.5434, + "step": 489600 + }, + { + "epoch": 12.54, + "learning_rate": 8.183898623492844e-06, + "loss": 2.2746, + "step": 489700 + }, + { + "epoch": 12.55, + "learning_rate": 8.175359497216245e-06, + "loss": 2.5382, + "step": 489800 + }, + { + "epoch": 12.55, + "learning_rate": 8.166820370939645e-06, + "loss": 2.4194, + "step": 489900 + }, + { + "epoch": 12.55, + "learning_rate": 8.158281244663047e-06, + "loss": 2.4621, + "step": 490000 + }, + { + "epoch": 12.56, + "learning_rate": 8.149742118386447e-06, + "loss": 2.1997, + "step": 490100 + }, + { + "epoch": 12.56, + "learning_rate": 8.141202992109848e-06, + "loss": 2.3675, + "step": 490200 + }, + { + "epoch": 12.56, + "learning_rate": 8.132663865833248e-06, + "loss": 2.3056, + "step": 490300 + }, + { + "epoch": 12.56, + "learning_rate": 8.12412473955665e-06, + "loss": 2.359, + "step": 490400 + }, + { + "epoch": 12.57, + "learning_rate": 8.11558561328005e-06, + "loss": 2.2198, + "step": 490500 + }, + { + "epoch": 12.57, + "learning_rate": 8.10704648700345e-06, + "loss": 2.2349, + "step": 490600 + }, + { + "epoch": 12.57, + "learning_rate": 8.098507360726851e-06, + "loss": 2.4303, + "step": 490700 + }, + { + "epoch": 12.57, + "learning_rate": 8.08996823445025e-06, + "loss": 2.4193, + "step": 490800 + }, + { + "epoch": 12.58, + "learning_rate": 8.081429108173652e-06, + "loss": 2.2192, + "step": 490900 + }, + { + "epoch": 12.58, + "learning_rate": 8.072889981897052e-06, + "loss": 2.3109, + "step": 491000 + }, + { + "epoch": 12.58, + "learning_rate": 8.064350855620454e-06, + "loss": 2.3217, + "step": 491100 + }, + { + "epoch": 12.58, + "learning_rate": 8.055811729343854e-06, + "loss": 2.2739, + "step": 491200 + }, + { + "epoch": 12.59, + "learning_rate": 8.047272603067255e-06, + "loss": 2.4991, + "step": 491300 + }, + { + "epoch": 12.59, + "learning_rate": 8.038733476790655e-06, + "loss": 2.234, + "step": 491400 + }, + { + "epoch": 12.59, + "learning_rate": 8.030194350514057e-06, + "loss": 2.27, + "step": 491500 + }, + { + "epoch": 12.59, + "learning_rate": 8.021655224237456e-06, + "loss": 2.4502, + "step": 491600 + }, + { + "epoch": 12.6, + "learning_rate": 8.013116097960858e-06, + "loss": 2.3828, + "step": 491700 + }, + { + "epoch": 12.6, + "learning_rate": 8.004576971684258e-06, + "loss": 2.3753, + "step": 491800 + }, + { + "epoch": 12.6, + "learning_rate": 7.996037845407658e-06, + "loss": 2.5328, + "step": 491900 + }, + { + "epoch": 12.6, + "learning_rate": 7.98749871913106e-06, + "loss": 2.0865, + "step": 492000 + }, + { + "epoch": 12.61, + "learning_rate": 7.978959592854459e-06, + "loss": 2.3216, + "step": 492100 + }, + { + "epoch": 12.61, + "learning_rate": 7.97042046657786e-06, + "loss": 2.1444, + "step": 492200 + }, + { + "epoch": 12.61, + "learning_rate": 7.96188134030126e-06, + "loss": 2.1694, + "step": 492300 + }, + { + "epoch": 12.61, + "learning_rate": 7.953342214024662e-06, + "loss": 2.258, + "step": 492400 + }, + { + "epoch": 12.62, + "learning_rate": 7.944803087748062e-06, + "loss": 2.4035, + "step": 492500 + }, + { + "epoch": 12.62, + "learning_rate": 7.936263961471463e-06, + "loss": 2.3153, + "step": 492600 + }, + { + "epoch": 12.62, + "learning_rate": 7.927724835194863e-06, + "loss": 2.4465, + "step": 492700 + }, + { + "epoch": 12.62, + "learning_rate": 7.919185708918265e-06, + "loss": 2.341, + "step": 492800 + }, + { + "epoch": 12.63, + "learning_rate": 7.910646582641665e-06, + "loss": 2.3799, + "step": 492900 + }, + { + "epoch": 12.63, + "learning_rate": 7.902107456365066e-06, + "loss": 2.2845, + "step": 493000 + }, + { + "epoch": 12.63, + "learning_rate": 7.893568330088466e-06, + "loss": 2.0654, + "step": 493100 + }, + { + "epoch": 12.63, + "learning_rate": 7.885029203811866e-06, + "loss": 2.4565, + "step": 493200 + }, + { + "epoch": 12.64, + "learning_rate": 7.876490077535267e-06, + "loss": 2.3472, + "step": 493300 + }, + { + "epoch": 12.64, + "learning_rate": 7.867950951258667e-06, + "loss": 2.3162, + "step": 493400 + }, + { + "epoch": 12.64, + "learning_rate": 7.859411824982069e-06, + "loss": 2.3647, + "step": 493500 + }, + { + "epoch": 12.64, + "learning_rate": 7.850872698705469e-06, + "loss": 2.4958, + "step": 493600 + }, + { + "epoch": 12.65, + "learning_rate": 7.84233357242887e-06, + "loss": 2.2391, + "step": 493700 + }, + { + "epoch": 12.65, + "learning_rate": 7.83379444615227e-06, + "loss": 2.2725, + "step": 493800 + }, + { + "epoch": 12.65, + "learning_rate": 7.825255319875672e-06, + "loss": 2.3836, + "step": 493900 + }, + { + "epoch": 12.65, + "learning_rate": 7.816716193599071e-06, + "loss": 2.3073, + "step": 494000 + }, + { + "epoch": 12.66, + "learning_rate": 7.808177067322473e-06, + "loss": 2.152, + "step": 494100 + }, + { + "epoch": 12.66, + "learning_rate": 7.799637941045873e-06, + "loss": 2.2982, + "step": 494200 + }, + { + "epoch": 12.66, + "learning_rate": 7.791098814769274e-06, + "loss": 2.4182, + "step": 494300 + }, + { + "epoch": 12.67, + "learning_rate": 7.782559688492674e-06, + "loss": 2.1968, + "step": 494400 + }, + { + "epoch": 12.67, + "learning_rate": 7.774020562216074e-06, + "loss": 2.4608, + "step": 494500 + }, + { + "epoch": 12.67, + "learning_rate": 7.765481435939476e-06, + "loss": 2.3586, + "step": 494600 + }, + { + "epoch": 12.67, + "learning_rate": 7.756942309662876e-06, + "loss": 2.397, + "step": 494700 + }, + { + "epoch": 12.68, + "learning_rate": 7.748403183386277e-06, + "loss": 2.3283, + "step": 494800 + }, + { + "epoch": 12.68, + "learning_rate": 7.739864057109677e-06, + "loss": 2.4132, + "step": 494900 + }, + { + "epoch": 12.68, + "learning_rate": 7.731324930833079e-06, + "loss": 2.4419, + "step": 495000 + }, + { + "epoch": 12.68, + "learning_rate": 7.722785804556478e-06, + "loss": 2.1422, + "step": 495100 + }, + { + "epoch": 12.69, + "learning_rate": 7.71424667827988e-06, + "loss": 2.289, + "step": 495200 + }, + { + "epoch": 12.69, + "learning_rate": 7.705707552003278e-06, + "loss": 2.4988, + "step": 495300 + }, + { + "epoch": 12.69, + "learning_rate": 7.69716842572668e-06, + "loss": 2.2937, + "step": 495400 + }, + { + "epoch": 12.69, + "learning_rate": 7.68862929945008e-06, + "loss": 2.4985, + "step": 495500 + }, + { + "epoch": 12.7, + "learning_rate": 7.680090173173481e-06, + "loss": 2.3567, + "step": 495600 + }, + { + "epoch": 12.7, + "learning_rate": 7.671551046896881e-06, + "loss": 2.1894, + "step": 495700 + }, + { + "epoch": 12.7, + "learning_rate": 7.663011920620282e-06, + "loss": 2.232, + "step": 495800 + }, + { + "epoch": 12.7, + "learning_rate": 7.654472794343682e-06, + "loss": 2.4518, + "step": 495900 + }, + { + "epoch": 12.71, + "learning_rate": 7.645933668067084e-06, + "loss": 2.3342, + "step": 496000 + }, + { + "epoch": 12.71, + "learning_rate": 7.637394541790484e-06, + "loss": 2.3339, + "step": 496100 + }, + { + "epoch": 12.71, + "learning_rate": 7.628855415513884e-06, + "loss": 2.4171, + "step": 496200 + }, + { + "epoch": 12.71, + "learning_rate": 7.620316289237285e-06, + "loss": 2.3569, + "step": 496300 + }, + { + "epoch": 12.72, + "learning_rate": 7.611777162960686e-06, + "loss": 2.2601, + "step": 496400 + }, + { + "epoch": 12.72, + "learning_rate": 7.6032380366840865e-06, + "loss": 2.3, + "step": 496500 + }, + { + "epoch": 12.72, + "learning_rate": 7.594698910407487e-06, + "loss": 2.1885, + "step": 496600 + }, + { + "epoch": 12.72, + "learning_rate": 7.586159784130888e-06, + "loss": 2.4208, + "step": 496700 + }, + { + "epoch": 12.73, + "learning_rate": 7.577620657854289e-06, + "loss": 2.2028, + "step": 496800 + }, + { + "epoch": 12.73, + "learning_rate": 7.569081531577689e-06, + "loss": 2.1802, + "step": 496900 + }, + { + "epoch": 12.73, + "learning_rate": 7.560542405301089e-06, + "loss": 2.4717, + "step": 497000 + }, + { + "epoch": 12.73, + "learning_rate": 7.55200327902449e-06, + "loss": 2.5148, + "step": 497100 + }, + { + "epoch": 12.74, + "learning_rate": 7.5434641527478905e-06, + "loss": 2.3434, + "step": 497200 + }, + { + "epoch": 12.74, + "learning_rate": 7.534925026471291e-06, + "loss": 2.2749, + "step": 497300 + }, + { + "epoch": 12.74, + "learning_rate": 7.526385900194692e-06, + "loss": 2.4476, + "step": 497400 + }, + { + "epoch": 12.74, + "learning_rate": 7.517846773918093e-06, + "loss": 2.4586, + "step": 497500 + }, + { + "epoch": 12.75, + "learning_rate": 7.509307647641493e-06, + "loss": 2.3242, + "step": 497600 + }, + { + "epoch": 12.75, + "learning_rate": 7.500768521364894e-06, + "loss": 2.2898, + "step": 497700 + }, + { + "epoch": 12.75, + "learning_rate": 7.492229395088295e-06, + "loss": 2.1807, + "step": 497800 + }, + { + "epoch": 12.75, + "learning_rate": 7.4836902688116955e-06, + "loss": 2.3854, + "step": 497900 + }, + { + "epoch": 12.76, + "learning_rate": 7.475151142535096e-06, + "loss": 2.3335, + "step": 498000 + }, + { + "epoch": 12.76, + "learning_rate": 7.466612016258497e-06, + "loss": 2.4008, + "step": 498100 + }, + { + "epoch": 12.76, + "learning_rate": 7.4580728899818976e-06, + "loss": 2.3006, + "step": 498200 + }, + { + "epoch": 12.77, + "learning_rate": 7.449533763705297e-06, + "loss": 2.4518, + "step": 498300 + }, + { + "epoch": 12.77, + "learning_rate": 7.440994637428698e-06, + "loss": 2.2865, + "step": 498400 + }, + { + "epoch": 12.77, + "learning_rate": 7.432455511152099e-06, + "loss": 2.4045, + "step": 498500 + }, + { + "epoch": 12.77, + "learning_rate": 7.4239163848754995e-06, + "loss": 2.2694, + "step": 498600 + }, + { + "epoch": 12.78, + "learning_rate": 7.4153772585989e-06, + "loss": 2.3668, + "step": 498700 + }, + { + "epoch": 12.78, + "learning_rate": 7.406838132322301e-06, + "loss": 2.3019, + "step": 498800 + }, + { + "epoch": 12.78, + "learning_rate": 7.398299006045702e-06, + "loss": 2.3908, + "step": 498900 + }, + { + "epoch": 12.78, + "learning_rate": 7.389759879769102e-06, + "loss": 2.3854, + "step": 499000 + }, + { + "epoch": 12.79, + "learning_rate": 7.381220753492503e-06, + "loss": 2.4618, + "step": 499100 + }, + { + "epoch": 12.79, + "learning_rate": 7.372681627215904e-06, + "loss": 2.4981, + "step": 499200 + }, + { + "epoch": 12.79, + "learning_rate": 7.364142500939304e-06, + "loss": 2.2234, + "step": 499300 + }, + { + "epoch": 12.79, + "learning_rate": 7.355603374662705e-06, + "loss": 2.2864, + "step": 499400 + }, + { + "epoch": 12.8, + "learning_rate": 7.347064248386106e-06, + "loss": 2.3078, + "step": 499500 + }, + { + "epoch": 12.8, + "learning_rate": 7.338525122109506e-06, + "loss": 2.311, + "step": 499600 + }, + { + "epoch": 12.8, + "learning_rate": 7.329985995832906e-06, + "loss": 2.3657, + "step": 499700 + }, + { + "epoch": 12.8, + "learning_rate": 7.321446869556307e-06, + "loss": 2.4746, + "step": 499800 + }, + { + "epoch": 12.81, + "learning_rate": 7.312907743279708e-06, + "loss": 2.2827, + "step": 499900 + }, + { + "epoch": 12.81, + "learning_rate": 7.3043686170031085e-06, + "loss": 2.4597, + "step": 500000 + }, + { + "epoch": 12.81, + "learning_rate": 7.295829490726509e-06, + "loss": 2.1647, + "step": 500100 + }, + { + "epoch": 12.81, + "learning_rate": 7.28729036444991e-06, + "loss": 2.3248, + "step": 500200 + }, + { + "epoch": 12.82, + "learning_rate": 7.278751238173311e-06, + "loss": 2.2732, + "step": 500300 + }, + { + "epoch": 12.82, + "learning_rate": 7.270212111896711e-06, + "loss": 2.4343, + "step": 500400 + }, + { + "epoch": 12.82, + "learning_rate": 7.261672985620112e-06, + "loss": 2.388, + "step": 500500 + }, + { + "epoch": 12.82, + "learning_rate": 7.253133859343513e-06, + "loss": 2.2686, + "step": 500600 + }, + { + "epoch": 12.83, + "learning_rate": 7.244594733066913e-06, + "loss": 2.2298, + "step": 500700 + }, + { + "epoch": 12.83, + "learning_rate": 7.236055606790314e-06, + "loss": 2.3278, + "step": 500800 + }, + { + "epoch": 12.83, + "learning_rate": 7.227516480513715e-06, + "loss": 2.3579, + "step": 500900 + }, + { + "epoch": 12.83, + "learning_rate": 7.218977354237115e-06, + "loss": 2.3337, + "step": 501000 + }, + { + "epoch": 12.84, + "learning_rate": 7.210438227960515e-06, + "loss": 2.3084, + "step": 501100 + }, + { + "epoch": 12.84, + "learning_rate": 7.201899101683916e-06, + "loss": 2.3216, + "step": 501200 + }, + { + "epoch": 12.84, + "learning_rate": 7.193359975407317e-06, + "loss": 2.381, + "step": 501300 + }, + { + "epoch": 12.84, + "learning_rate": 7.1848208491307175e-06, + "loss": 2.2307, + "step": 501400 + }, + { + "epoch": 12.85, + "learning_rate": 7.176281722854118e-06, + "loss": 2.3823, + "step": 501500 + }, + { + "epoch": 12.85, + "learning_rate": 7.167742596577519e-06, + "loss": 2.4136, + "step": 501600 + }, + { + "epoch": 12.85, + "learning_rate": 7.1592034703009196e-06, + "loss": 2.4216, + "step": 501700 + }, + { + "epoch": 12.85, + "learning_rate": 7.15066434402432e-06, + "loss": 2.3522, + "step": 501800 + }, + { + "epoch": 12.86, + "learning_rate": 7.142125217747721e-06, + "loss": 2.1623, + "step": 501900 + }, + { + "epoch": 12.86, + "learning_rate": 7.133586091471122e-06, + "loss": 2.313, + "step": 502000 + }, + { + "epoch": 12.86, + "learning_rate": 7.125046965194522e-06, + "loss": 2.2511, + "step": 502100 + }, + { + "epoch": 12.87, + "learning_rate": 7.116507838917923e-06, + "loss": 2.284, + "step": 502200 + }, + { + "epoch": 12.87, + "learning_rate": 7.107968712641323e-06, + "loss": 2.3836, + "step": 502300 + }, + { + "epoch": 12.87, + "learning_rate": 7.099429586364724e-06, + "loss": 2.1526, + "step": 502400 + }, + { + "epoch": 12.87, + "learning_rate": 7.090890460088124e-06, + "loss": 2.3357, + "step": 502500 + }, + { + "epoch": 12.88, + "learning_rate": 7.082351333811525e-06, + "loss": 2.2915, + "step": 502600 + }, + { + "epoch": 12.88, + "learning_rate": 7.073812207534926e-06, + "loss": 2.4807, + "step": 502700 + }, + { + "epoch": 12.88, + "learning_rate": 7.065273081258326e-06, + "loss": 2.4016, + "step": 502800 + }, + { + "epoch": 12.88, + "learning_rate": 7.056733954981727e-06, + "loss": 2.3555, + "step": 502900 + }, + { + "epoch": 12.89, + "learning_rate": 7.048194828705128e-06, + "loss": 2.3149, + "step": 503000 + }, + { + "epoch": 12.89, + "learning_rate": 7.0396557024285285e-06, + "loss": 2.3492, + "step": 503100 + }, + { + "epoch": 12.89, + "learning_rate": 7.031116576151929e-06, + "loss": 2.3255, + "step": 503200 + }, + { + "epoch": 12.89, + "learning_rate": 7.02257744987533e-06, + "loss": 2.3781, + "step": 503300 + }, + { + "epoch": 12.9, + "learning_rate": 7.014038323598731e-06, + "loss": 2.3089, + "step": 503400 + }, + { + "epoch": 12.9, + "learning_rate": 7.005499197322131e-06, + "loss": 2.3618, + "step": 503500 + }, + { + "epoch": 12.9, + "learning_rate": 6.996960071045531e-06, + "loss": 2.2661, + "step": 503600 + }, + { + "epoch": 12.9, + "learning_rate": 6.988420944768932e-06, + "loss": 2.4096, + "step": 503700 + }, + { + "epoch": 12.91, + "learning_rate": 6.979881818492333e-06, + "loss": 2.2042, + "step": 503800 + }, + { + "epoch": 12.91, + "learning_rate": 6.971342692215733e-06, + "loss": 2.3418, + "step": 503900 + }, + { + "epoch": 12.91, + "learning_rate": 6.962803565939134e-06, + "loss": 2.2529, + "step": 504000 + }, + { + "epoch": 12.91, + "learning_rate": 6.954264439662533e-06, + "loss": 2.3809, + "step": 504100 + }, + { + "epoch": 12.92, + "learning_rate": 6.945725313385934e-06, + "loss": 2.3171, + "step": 504200 + }, + { + "epoch": 12.92, + "learning_rate": 6.937186187109334e-06, + "loss": 2.3075, + "step": 504300 + }, + { + "epoch": 12.92, + "learning_rate": 6.928647060832735e-06, + "loss": 2.5179, + "step": 504400 + }, + { + "epoch": 12.92, + "learning_rate": 6.920107934556136e-06, + "loss": 2.3905, + "step": 504500 + }, + { + "epoch": 12.93, + "learning_rate": 6.9115688082795365e-06, + "loss": 2.2362, + "step": 504600 + }, + { + "epoch": 12.93, + "learning_rate": 6.903029682002937e-06, + "loss": 2.4593, + "step": 504700 + }, + { + "epoch": 12.93, + "learning_rate": 6.894490555726338e-06, + "loss": 2.3502, + "step": 504800 + }, + { + "epoch": 12.93, + "learning_rate": 6.885951429449739e-06, + "loss": 2.4884, + "step": 504900 + }, + { + "epoch": 12.94, + "learning_rate": 6.877412303173139e-06, + "loss": 2.3433, + "step": 505000 + }, + { + "epoch": 12.94, + "learning_rate": 6.86887317689654e-06, + "loss": 2.3418, + "step": 505100 + }, + { + "epoch": 12.94, + "learning_rate": 6.860334050619941e-06, + "loss": 2.2611, + "step": 505200 + }, + { + "epoch": 12.94, + "learning_rate": 6.851794924343341e-06, + "loss": 2.4576, + "step": 505300 + }, + { + "epoch": 12.95, + "learning_rate": 6.843255798066741e-06, + "loss": 2.354, + "step": 505400 + }, + { + "epoch": 12.95, + "learning_rate": 6.834716671790142e-06, + "loss": 2.2216, + "step": 505500 + }, + { + "epoch": 12.95, + "learning_rate": 6.826177545513543e-06, + "loss": 2.3089, + "step": 505600 + }, + { + "epoch": 12.95, + "learning_rate": 6.817638419236943e-06, + "loss": 2.405, + "step": 505700 + }, + { + "epoch": 12.96, + "learning_rate": 6.809099292960344e-06, + "loss": 2.2704, + "step": 505800 + }, + { + "epoch": 12.96, + "learning_rate": 6.800560166683745e-06, + "loss": 2.3242, + "step": 505900 + }, + { + "epoch": 12.96, + "learning_rate": 6.7920210404071455e-06, + "loss": 2.2489, + "step": 506000 + }, + { + "epoch": 12.96, + "learning_rate": 6.783481914130546e-06, + "loss": 2.2966, + "step": 506100 + }, + { + "epoch": 12.97, + "learning_rate": 6.774942787853947e-06, + "loss": 2.3083, + "step": 506200 + }, + { + "epoch": 12.97, + "learning_rate": 6.7664036615773476e-06, + "loss": 2.4113, + "step": 506300 + }, + { + "epoch": 12.97, + "learning_rate": 6.757864535300748e-06, + "loss": 2.3892, + "step": 506400 + }, + { + "epoch": 12.98, + "learning_rate": 6.749325409024149e-06, + "loss": 2.2771, + "step": 506500 + }, + { + "epoch": 12.98, + "learning_rate": 6.74078628274755e-06, + "loss": 2.3133, + "step": 506600 + }, + { + "epoch": 12.98, + "learning_rate": 6.7322471564709495e-06, + "loss": 2.3161, + "step": 506700 + }, + { + "epoch": 12.98, + "learning_rate": 6.72370803019435e-06, + "loss": 2.2451, + "step": 506800 + }, + { + "epoch": 12.99, + "learning_rate": 6.715168903917751e-06, + "loss": 2.3036, + "step": 506900 + }, + { + "epoch": 12.99, + "learning_rate": 6.706629777641152e-06, + "loss": 2.3547, + "step": 507000 + }, + { + "epoch": 12.99, + "learning_rate": 6.698090651364552e-06, + "loss": 2.3271, + "step": 507100 + }, + { + "epoch": 12.99, + "learning_rate": 6.689551525087953e-06, + "loss": 2.2541, + "step": 507200 + }, + { + "epoch": 13.0, + "learning_rate": 6.681012398811354e-06, + "loss": 2.2262, + "step": 507300 + }, + { + "epoch": 13.0, + "learning_rate": 6.6724732725347544e-06, + "loss": 2.2099, + "step": 507400 + }, + { + "epoch": 13.0, + "learning_rate": 6.663934146258155e-06, + "loss": 2.285, + "step": 507500 + }, + { + "epoch": 13.0, + "learning_rate": 6.655395019981556e-06, + "loss": 2.2368, + "step": 507600 + }, + { + "epoch": 13.01, + "learning_rate": 6.6468558937049565e-06, + "loss": 2.2398, + "step": 507700 + }, + { + "epoch": 13.01, + "learning_rate": 6.638316767428357e-06, + "loss": 2.3463, + "step": 507800 + }, + { + "epoch": 13.01, + "learning_rate": 6.629777641151758e-06, + "loss": 2.2428, + "step": 507900 + }, + { + "epoch": 13.01, + "learning_rate": 6.621238514875159e-06, + "loss": 2.5164, + "step": 508000 + }, + { + "epoch": 13.02, + "learning_rate": 6.6126993885985585e-06, + "loss": 2.2446, + "step": 508100 + }, + { + "epoch": 13.02, + "learning_rate": 6.604160262321959e-06, + "loss": 2.1762, + "step": 508200 + }, + { + "epoch": 13.02, + "learning_rate": 6.59562113604536e-06, + "loss": 2.3407, + "step": 508300 + }, + { + "epoch": 13.02, + "learning_rate": 6.587082009768761e-06, + "loss": 2.3899, + "step": 508400 + }, + { + "epoch": 13.03, + "learning_rate": 6.578542883492161e-06, + "loss": 2.2486, + "step": 508500 + }, + { + "epoch": 13.03, + "learning_rate": 6.570003757215562e-06, + "loss": 2.2667, + "step": 508600 + }, + { + "epoch": 13.03, + "learning_rate": 6.561464630938963e-06, + "loss": 2.3153, + "step": 508700 + }, + { + "epoch": 13.03, + "learning_rate": 6.552925504662363e-06, + "loss": 2.2504, + "step": 508800 + }, + { + "epoch": 13.04, + "learning_rate": 6.544386378385764e-06, + "loss": 2.3833, + "step": 508900 + }, + { + "epoch": 13.04, + "learning_rate": 6.535847252109165e-06, + "loss": 2.1683, + "step": 509000 + }, + { + "epoch": 13.04, + "learning_rate": 6.5273081258325655e-06, + "loss": 2.2484, + "step": 509100 + }, + { + "epoch": 13.04, + "learning_rate": 6.518768999555966e-06, + "loss": 2.4141, + "step": 509200 + }, + { + "epoch": 13.05, + "learning_rate": 6.510229873279367e-06, + "loss": 2.0949, + "step": 509300 + }, + { + "epoch": 13.05, + "learning_rate": 6.501690747002767e-06, + "loss": 2.3086, + "step": 509400 + }, + { + "epoch": 13.05, + "learning_rate": 6.4931516207261675e-06, + "loss": 2.0418, + "step": 509500 + }, + { + "epoch": 13.05, + "learning_rate": 6.484612494449568e-06, + "loss": 2.2146, + "step": 509600 + }, + { + "epoch": 13.06, + "learning_rate": 6.476073368172969e-06, + "loss": 2.2324, + "step": 509700 + }, + { + "epoch": 13.06, + "learning_rate": 6.4675342418963696e-06, + "loss": 2.4706, + "step": 509800 + }, + { + "epoch": 13.06, + "learning_rate": 6.45899511561977e-06, + "loss": 2.114, + "step": 509900 + }, + { + "epoch": 13.06, + "learning_rate": 6.450455989343171e-06, + "loss": 2.4077, + "step": 510000 + }, + { + "epoch": 13.07, + "learning_rate": 6.441916863066572e-06, + "loss": 2.4391, + "step": 510100 + }, + { + "epoch": 13.07, + "learning_rate": 6.433377736789972e-06, + "loss": 2.2424, + "step": 510200 + }, + { + "epoch": 13.07, + "learning_rate": 6.424838610513373e-06, + "loss": 2.2792, + "step": 510300 + }, + { + "epoch": 13.08, + "learning_rate": 6.416299484236774e-06, + "loss": 2.3093, + "step": 510400 + }, + { + "epoch": 13.08, + "learning_rate": 6.4077603579601745e-06, + "loss": 2.4905, + "step": 510500 + }, + { + "epoch": 13.08, + "learning_rate": 6.399221231683575e-06, + "loss": 2.4747, + "step": 510600 + }, + { + "epoch": 13.08, + "learning_rate": 6.390682105406975e-06, + "loss": 2.35, + "step": 510700 + }, + { + "epoch": 13.09, + "learning_rate": 6.382142979130376e-06, + "loss": 2.2641, + "step": 510800 + }, + { + "epoch": 13.09, + "learning_rate": 6.3736038528537764e-06, + "loss": 2.1705, + "step": 510900 + }, + { + "epoch": 13.09, + "learning_rate": 6.365064726577177e-06, + "loss": 2.203, + "step": 511000 + }, + { + "epoch": 13.09, + "learning_rate": 6.356525600300578e-06, + "loss": 2.3118, + "step": 511100 + }, + { + "epoch": 13.1, + "learning_rate": 6.3479864740239785e-06, + "loss": 2.3299, + "step": 511200 + }, + { + "epoch": 13.1, + "learning_rate": 6.339447347747379e-06, + "loss": 2.3658, + "step": 511300 + }, + { + "epoch": 13.1, + "learning_rate": 6.33090822147078e-06, + "loss": 2.4442, + "step": 511400 + }, + { + "epoch": 13.1, + "learning_rate": 6.322369095194181e-06, + "loss": 2.4042, + "step": 511500 + }, + { + "epoch": 13.11, + "learning_rate": 6.313829968917581e-06, + "loss": 2.4569, + "step": 511600 + }, + { + "epoch": 13.11, + "learning_rate": 6.305290842640982e-06, + "loss": 2.3307, + "step": 511700 + }, + { + "epoch": 13.11, + "learning_rate": 6.296751716364383e-06, + "loss": 2.1856, + "step": 511800 + }, + { + "epoch": 13.11, + "learning_rate": 6.2882125900877835e-06, + "loss": 2.218, + "step": 511900 + }, + { + "epoch": 13.12, + "learning_rate": 6.279673463811183e-06, + "loss": 2.173, + "step": 512000 + }, + { + "epoch": 13.12, + "learning_rate": 6.271134337534584e-06, + "loss": 2.3561, + "step": 512100 + }, + { + "epoch": 13.12, + "learning_rate": 6.262595211257985e-06, + "loss": 2.3229, + "step": 512200 + }, + { + "epoch": 13.12, + "learning_rate": 6.254056084981385e-06, + "loss": 2.2665, + "step": 512300 + }, + { + "epoch": 13.13, + "learning_rate": 6.245516958704785e-06, + "loss": 2.2048, + "step": 512400 + }, + { + "epoch": 13.13, + "learning_rate": 6.236977832428186e-06, + "loss": 2.1869, + "step": 512500 + }, + { + "epoch": 13.13, + "learning_rate": 6.228438706151587e-06, + "loss": 2.2518, + "step": 512600 + }, + { + "epoch": 13.13, + "learning_rate": 6.219899579874987e-06, + "loss": 2.3848, + "step": 512700 + }, + { + "epoch": 13.14, + "learning_rate": 6.211360453598388e-06, + "loss": 2.2685, + "step": 512800 + }, + { + "epoch": 13.14, + "learning_rate": 6.202821327321789e-06, + "loss": 2.2747, + "step": 512900 + }, + { + "epoch": 13.14, + "learning_rate": 6.1942822010451895e-06, + "loss": 2.2388, + "step": 513000 + }, + { + "epoch": 13.14, + "learning_rate": 6.18574307476859e-06, + "loss": 2.2014, + "step": 513100 + }, + { + "epoch": 13.15, + "learning_rate": 6.177203948491991e-06, + "loss": 2.2756, + "step": 513200 + }, + { + "epoch": 13.15, + "learning_rate": 6.1686648222153916e-06, + "loss": 2.4186, + "step": 513300 + }, + { + "epoch": 13.15, + "learning_rate": 6.160125695938792e-06, + "loss": 2.4534, + "step": 513400 + }, + { + "epoch": 13.15, + "learning_rate": 6.151586569662193e-06, + "loss": 2.3178, + "step": 513500 + }, + { + "epoch": 13.16, + "learning_rate": 6.143047443385593e-06, + "loss": 2.1683, + "step": 513600 + }, + { + "epoch": 13.16, + "learning_rate": 6.1345083171089935e-06, + "loss": 2.4042, + "step": 513700 + }, + { + "epoch": 13.16, + "learning_rate": 6.125969190832394e-06, + "loss": 2.3706, + "step": 513800 + }, + { + "epoch": 13.16, + "learning_rate": 6.117430064555795e-06, + "loss": 2.3816, + "step": 513900 + }, + { + "epoch": 13.17, + "learning_rate": 6.108890938279196e-06, + "loss": 2.3901, + "step": 514000 + }, + { + "epoch": 13.17, + "learning_rate": 6.100351812002596e-06, + "loss": 2.4142, + "step": 514100 + }, + { + "epoch": 13.17, + "learning_rate": 6.091812685725997e-06, + "loss": 2.3527, + "step": 514200 + }, + { + "epoch": 13.18, + "learning_rate": 6.083273559449398e-06, + "loss": 2.2065, + "step": 514300 + }, + { + "epoch": 13.18, + "learning_rate": 6.0747344331727984e-06, + "loss": 2.2061, + "step": 514400 + }, + { + "epoch": 13.18, + "learning_rate": 6.066195306896199e-06, + "loss": 2.392, + "step": 514500 + }, + { + "epoch": 13.18, + "learning_rate": 6.0576561806196e-06, + "loss": 2.3672, + "step": 514600 + }, + { + "epoch": 13.19, + "learning_rate": 6.0491170543430005e-06, + "loss": 2.4435, + "step": 514700 + }, + { + "epoch": 13.19, + "learning_rate": 6.040577928066401e-06, + "loss": 2.3277, + "step": 514800 + }, + { + "epoch": 13.19, + "learning_rate": 6.032038801789801e-06, + "loss": 2.2059, + "step": 514900 + }, + { + "epoch": 13.19, + "learning_rate": 6.023499675513202e-06, + "loss": 2.3794, + "step": 515000 + }, + { + "epoch": 13.2, + "learning_rate": 6.014960549236602e-06, + "loss": 2.3631, + "step": 515100 + }, + { + "epoch": 13.2, + "learning_rate": 6.006421422960002e-06, + "loss": 2.2747, + "step": 515200 + }, + { + "epoch": 13.2, + "learning_rate": 5.997882296683403e-06, + "loss": 2.2276, + "step": 515300 + }, + { + "epoch": 13.2, + "learning_rate": 5.989343170406804e-06, + "loss": 2.3502, + "step": 515400 + }, + { + "epoch": 13.21, + "learning_rate": 5.9808040441302045e-06, + "loss": 2.2641, + "step": 515500 + }, + { + "epoch": 13.21, + "learning_rate": 5.972264917853605e-06, + "loss": 2.4032, + "step": 515600 + }, + { + "epoch": 13.21, + "learning_rate": 5.963725791577006e-06, + "loss": 2.2596, + "step": 515700 + }, + { + "epoch": 13.21, + "learning_rate": 5.9551866653004066e-06, + "loss": 2.2879, + "step": 515800 + }, + { + "epoch": 13.22, + "learning_rate": 5.946647539023807e-06, + "loss": 2.2841, + "step": 515900 + }, + { + "epoch": 13.22, + "learning_rate": 5.938108412747208e-06, + "loss": 2.23, + "step": 516000 + }, + { + "epoch": 13.22, + "learning_rate": 5.929569286470609e-06, + "loss": 2.4033, + "step": 516100 + }, + { + "epoch": 13.22, + "learning_rate": 5.921030160194009e-06, + "loss": 2.1437, + "step": 516200 + }, + { + "epoch": 13.23, + "learning_rate": 5.91249103391741e-06, + "loss": 2.2891, + "step": 516300 + }, + { + "epoch": 13.23, + "learning_rate": 5.903951907640811e-06, + "loss": 2.1633, + "step": 516400 + }, + { + "epoch": 13.23, + "learning_rate": 5.895412781364211e-06, + "loss": 2.2754, + "step": 516500 + }, + { + "epoch": 13.23, + "learning_rate": 5.886873655087611e-06, + "loss": 2.3523, + "step": 516600 + }, + { + "epoch": 13.24, + "learning_rate": 5.878334528811012e-06, + "loss": 2.2518, + "step": 516700 + }, + { + "epoch": 13.24, + "learning_rate": 5.869795402534413e-06, + "loss": 2.3984, + "step": 516800 + }, + { + "epoch": 13.24, + "learning_rate": 5.8612562762578134e-06, + "loss": 2.3942, + "step": 516900 + }, + { + "epoch": 13.24, + "learning_rate": 5.852717149981214e-06, + "loss": 2.3366, + "step": 517000 + }, + { + "epoch": 13.25, + "learning_rate": 5.844178023704615e-06, + "loss": 2.2217, + "step": 517100 + }, + { + "epoch": 13.25, + "learning_rate": 5.8356388974280155e-06, + "loss": 2.3396, + "step": 517200 + }, + { + "epoch": 13.25, + "learning_rate": 5.827099771151416e-06, + "loss": 2.2669, + "step": 517300 + }, + { + "epoch": 13.25, + "learning_rate": 5.818560644874817e-06, + "loss": 2.3906, + "step": 517400 + }, + { + "epoch": 13.26, + "learning_rate": 5.810021518598218e-06, + "loss": 2.3159, + "step": 517500 + }, + { + "epoch": 13.26, + "learning_rate": 5.801482392321618e-06, + "loss": 2.5124, + "step": 517600 + }, + { + "epoch": 13.26, + "learning_rate": 5.792943266045019e-06, + "loss": 2.2749, + "step": 517700 + }, + { + "epoch": 13.26, + "learning_rate": 5.784404139768419e-06, + "loss": 2.417, + "step": 517800 + }, + { + "epoch": 13.27, + "learning_rate": 5.77586501349182e-06, + "loss": 2.4673, + "step": 517900 + }, + { + "epoch": 13.27, + "learning_rate": 5.76732588721522e-06, + "loss": 2.343, + "step": 518000 + }, + { + "epoch": 13.27, + "learning_rate": 5.758786760938621e-06, + "loss": 2.3252, + "step": 518100 + }, + { + "epoch": 13.27, + "learning_rate": 5.750247634662022e-06, + "loss": 2.5362, + "step": 518200 + }, + { + "epoch": 13.28, + "learning_rate": 5.741708508385422e-06, + "loss": 2.3327, + "step": 518300 + }, + { + "epoch": 13.28, + "learning_rate": 5.733169382108823e-06, + "loss": 2.347, + "step": 518400 + }, + { + "epoch": 13.28, + "learning_rate": 5.724630255832224e-06, + "loss": 2.3024, + "step": 518500 + }, + { + "epoch": 13.29, + "learning_rate": 5.7160911295556245e-06, + "loss": 2.278, + "step": 518600 + }, + { + "epoch": 13.29, + "learning_rate": 5.707552003279025e-06, + "loss": 2.3998, + "step": 518700 + }, + { + "epoch": 13.29, + "learning_rate": 5.699012877002426e-06, + "loss": 2.2746, + "step": 518800 + }, + { + "epoch": 13.29, + "learning_rate": 5.690473750725827e-06, + "loss": 2.2974, + "step": 518900 + }, + { + "epoch": 13.3, + "learning_rate": 5.681934624449227e-06, + "loss": 2.3175, + "step": 519000 + }, + { + "epoch": 13.3, + "learning_rate": 5.673395498172627e-06, + "loss": 2.3615, + "step": 519100 + }, + { + "epoch": 13.3, + "learning_rate": 5.664856371896028e-06, + "loss": 2.2808, + "step": 519200 + }, + { + "epoch": 13.3, + "learning_rate": 5.6563172456194286e-06, + "loss": 2.4497, + "step": 519300 + }, + { + "epoch": 13.31, + "learning_rate": 5.647778119342828e-06, + "loss": 2.4673, + "step": 519400 + }, + { + "epoch": 13.31, + "learning_rate": 5.639238993066229e-06, + "loss": 2.1913, + "step": 519500 + }, + { + "epoch": 13.31, + "learning_rate": 5.63069986678963e-06, + "loss": 2.1462, + "step": 519600 + }, + { + "epoch": 13.31, + "learning_rate": 5.6221607405130305e-06, + "loss": 2.3437, + "step": 519700 + }, + { + "epoch": 13.32, + "learning_rate": 5.613621614236431e-06, + "loss": 2.2628, + "step": 519800 + }, + { + "epoch": 13.32, + "learning_rate": 5.605082487959832e-06, + "loss": 2.1694, + "step": 519900 + }, + { + "epoch": 13.32, + "learning_rate": 5.596543361683233e-06, + "loss": 2.3273, + "step": 520000 + }, + { + "epoch": 13.32, + "learning_rate": 5.588004235406633e-06, + "loss": 2.2511, + "step": 520100 + }, + { + "epoch": 13.33, + "learning_rate": 5.579465109130034e-06, + "loss": 2.3144, + "step": 520200 + }, + { + "epoch": 13.33, + "learning_rate": 5.570925982853435e-06, + "loss": 2.3039, + "step": 520300 + }, + { + "epoch": 13.33, + "learning_rate": 5.5623868565768354e-06, + "loss": 2.286, + "step": 520400 + }, + { + "epoch": 13.33, + "learning_rate": 5.553847730300236e-06, + "loss": 2.2385, + "step": 520500 + }, + { + "epoch": 13.34, + "learning_rate": 5.545308604023637e-06, + "loss": 2.127, + "step": 520600 + }, + { + "epoch": 13.34, + "learning_rate": 5.536769477747037e-06, + "loss": 2.3676, + "step": 520700 + }, + { + "epoch": 13.34, + "learning_rate": 5.528230351470437e-06, + "loss": 2.3496, + "step": 520800 + }, + { + "epoch": 13.34, + "learning_rate": 5.519691225193838e-06, + "loss": 2.3599, + "step": 520900 + }, + { + "epoch": 13.35, + "learning_rate": 5.511152098917239e-06, + "loss": 2.1873, + "step": 521000 + }, + { + "epoch": 13.35, + "learning_rate": 5.5026129726406395e-06, + "loss": 2.292, + "step": 521100 + }, + { + "epoch": 13.35, + "learning_rate": 5.49407384636404e-06, + "loss": 2.2371, + "step": 521200 + }, + { + "epoch": 13.35, + "learning_rate": 5.485534720087441e-06, + "loss": 2.3498, + "step": 521300 + }, + { + "epoch": 13.36, + "learning_rate": 5.476995593810842e-06, + "loss": 2.2792, + "step": 521400 + }, + { + "epoch": 13.36, + "learning_rate": 5.468456467534242e-06, + "loss": 2.2241, + "step": 521500 + }, + { + "epoch": 13.36, + "learning_rate": 5.459917341257643e-06, + "loss": 2.3171, + "step": 521600 + }, + { + "epoch": 13.36, + "learning_rate": 5.451378214981044e-06, + "loss": 2.3056, + "step": 521700 + }, + { + "epoch": 13.37, + "learning_rate": 5.442839088704444e-06, + "loss": 2.1791, + "step": 521800 + }, + { + "epoch": 13.37, + "learning_rate": 5.434299962427845e-06, + "loss": 2.2324, + "step": 521900 + }, + { + "epoch": 13.37, + "learning_rate": 5.425760836151245e-06, + "loss": 2.2704, + "step": 522000 + }, + { + "epoch": 13.37, + "learning_rate": 5.417221709874646e-06, + "loss": 2.3341, + "step": 522100 + }, + { + "epoch": 13.38, + "learning_rate": 5.408682583598046e-06, + "loss": 2.1766, + "step": 522200 + }, + { + "epoch": 13.38, + "learning_rate": 5.400143457321447e-06, + "loss": 2.1573, + "step": 522300 + }, + { + "epoch": 13.38, + "learning_rate": 5.391604331044848e-06, + "loss": 2.2716, + "step": 522400 + }, + { + "epoch": 13.39, + "learning_rate": 5.3830652047682485e-06, + "loss": 2.2186, + "step": 522500 + }, + { + "epoch": 13.39, + "learning_rate": 5.374526078491649e-06, + "loss": 2.4815, + "step": 522600 + }, + { + "epoch": 13.39, + "learning_rate": 5.36598695221505e-06, + "loss": 2.5104, + "step": 522700 + }, + { + "epoch": 13.39, + "learning_rate": 5.3574478259384506e-06, + "loss": 2.3255, + "step": 522800 + }, + { + "epoch": 13.4, + "learning_rate": 5.348908699661851e-06, + "loss": 2.3178, + "step": 522900 + }, + { + "epoch": 13.4, + "learning_rate": 5.340369573385252e-06, + "loss": 2.2776, + "step": 523000 + }, + { + "epoch": 13.4, + "learning_rate": 5.331830447108653e-06, + "loss": 2.1851, + "step": 523100 + }, + { + "epoch": 13.4, + "learning_rate": 5.323291320832053e-06, + "loss": 2.4296, + "step": 523200 + }, + { + "epoch": 13.41, + "learning_rate": 5.314752194555453e-06, + "loss": 2.2819, + "step": 523300 + }, + { + "epoch": 13.41, + "learning_rate": 5.306213068278854e-06, + "loss": 2.322, + "step": 523400 + }, + { + "epoch": 13.41, + "learning_rate": 5.297673942002255e-06, + "loss": 2.1871, + "step": 523500 + }, + { + "epoch": 13.41, + "learning_rate": 5.289134815725655e-06, + "loss": 2.382, + "step": 523600 + }, + { + "epoch": 13.42, + "learning_rate": 5.280595689449056e-06, + "loss": 2.51, + "step": 523700 + }, + { + "epoch": 13.42, + "learning_rate": 5.272056563172456e-06, + "loss": 2.5047, + "step": 523800 + }, + { + "epoch": 13.42, + "learning_rate": 5.263517436895857e-06, + "loss": 2.1496, + "step": 523900 + }, + { + "epoch": 13.42, + "learning_rate": 5.254978310619257e-06, + "loss": 2.3528, + "step": 524000 + }, + { + "epoch": 13.43, + "learning_rate": 5.246439184342658e-06, + "loss": 2.23, + "step": 524100 + }, + { + "epoch": 13.43, + "learning_rate": 5.237900058066059e-06, + "loss": 2.2, + "step": 524200 + }, + { + "epoch": 13.43, + "learning_rate": 5.229360931789459e-06, + "loss": 2.4509, + "step": 524300 + }, + { + "epoch": 13.43, + "learning_rate": 5.22082180551286e-06, + "loss": 2.218, + "step": 524400 + }, + { + "epoch": 13.44, + "learning_rate": 5.212282679236261e-06, + "loss": 2.1552, + "step": 524500 + }, + { + "epoch": 13.44, + "learning_rate": 5.2037435529596615e-06, + "loss": 2.4082, + "step": 524600 + }, + { + "epoch": 13.44, + "learning_rate": 5.195204426683062e-06, + "loss": 2.1625, + "step": 524700 + }, + { + "epoch": 13.44, + "learning_rate": 5.186665300406463e-06, + "loss": 2.3216, + "step": 524800 + }, + { + "epoch": 13.45, + "learning_rate": 5.178126174129863e-06, + "loss": 2.2809, + "step": 524900 + }, + { + "epoch": 13.45, + "learning_rate": 5.1695870478532634e-06, + "loss": 2.185, + "step": 525000 + }, + { + "epoch": 13.45, + "learning_rate": 5.161047921576664e-06, + "loss": 2.2444, + "step": 525100 + }, + { + "epoch": 13.45, + "learning_rate": 5.152508795300065e-06, + "loss": 2.3672, + "step": 525200 + }, + { + "epoch": 13.46, + "learning_rate": 5.1439696690234656e-06, + "loss": 2.2422, + "step": 525300 + }, + { + "epoch": 13.46, + "learning_rate": 5.135430542746866e-06, + "loss": 2.1887, + "step": 525400 + }, + { + "epoch": 13.46, + "learning_rate": 5.126891416470267e-06, + "loss": 2.2537, + "step": 525500 + }, + { + "epoch": 13.46, + "learning_rate": 5.118352290193668e-06, + "loss": 2.4022, + "step": 525600 + }, + { + "epoch": 13.47, + "learning_rate": 5.109813163917068e-06, + "loss": 2.3139, + "step": 525700 + }, + { + "epoch": 13.47, + "learning_rate": 5.101274037640469e-06, + "loss": 2.2153, + "step": 525800 + }, + { + "epoch": 13.47, + "learning_rate": 5.09273491136387e-06, + "loss": 2.3421, + "step": 525900 + }, + { + "epoch": 13.47, + "learning_rate": 5.0841957850872705e-06, + "loss": 2.3735, + "step": 526000 + }, + { + "epoch": 13.48, + "learning_rate": 5.075656658810671e-06, + "loss": 2.3653, + "step": 526100 + }, + { + "epoch": 13.48, + "learning_rate": 5.067117532534071e-06, + "loss": 2.3226, + "step": 526200 + }, + { + "epoch": 13.48, + "learning_rate": 5.058578406257472e-06, + "loss": 2.2424, + "step": 526300 + }, + { + "epoch": 13.48, + "learning_rate": 5.050039279980872e-06, + "loss": 2.319, + "step": 526400 + }, + { + "epoch": 13.49, + "learning_rate": 5.041500153704273e-06, + "loss": 2.1001, + "step": 526500 + }, + { + "epoch": 13.49, + "learning_rate": 5.032961027427674e-06, + "loss": 2.3242, + "step": 526600 + }, + { + "epoch": 13.49, + "learning_rate": 5.0244219011510745e-06, + "loss": 2.1981, + "step": 526700 + }, + { + "epoch": 13.5, + "learning_rate": 5.015882774874475e-06, + "loss": 2.3837, + "step": 526800 + }, + { + "epoch": 13.5, + "learning_rate": 5.007343648597876e-06, + "loss": 2.2706, + "step": 526900 + }, + { + "epoch": 13.5, + "learning_rate": 4.998804522321277e-06, + "loss": 2.3966, + "step": 527000 + }, + { + "epoch": 13.5, + "learning_rate": 4.990265396044677e-06, + "loss": 2.1872, + "step": 527100 + }, + { + "epoch": 13.51, + "learning_rate": 4.981726269768078e-06, + "loss": 2.3085, + "step": 527200 + }, + { + "epoch": 13.51, + "learning_rate": 4.973187143491479e-06, + "loss": 2.1895, + "step": 527300 + }, + { + "epoch": 13.51, + "learning_rate": 4.9646480172148794e-06, + "loss": 2.2384, + "step": 527400 + }, + { + "epoch": 13.51, + "learning_rate": 4.956108890938279e-06, + "loss": 2.3372, + "step": 527500 + }, + { + "epoch": 13.52, + "learning_rate": 4.94756976466168e-06, + "loss": 2.2382, + "step": 527600 + }, + { + "epoch": 13.52, + "learning_rate": 4.939030638385081e-06, + "loss": 2.4529, + "step": 527700 + }, + { + "epoch": 13.52, + "learning_rate": 4.930491512108481e-06, + "loss": 2.2532, + "step": 527800 + }, + { + "epoch": 13.52, + "learning_rate": 4.921952385831882e-06, + "loss": 2.386, + "step": 527900 + }, + { + "epoch": 13.53, + "learning_rate": 4.913413259555283e-06, + "loss": 2.3378, + "step": 528000 + }, + { + "epoch": 13.53, + "learning_rate": 4.9048741332786835e-06, + "loss": 2.4536, + "step": 528100 + }, + { + "epoch": 13.53, + "learning_rate": 4.896335007002083e-06, + "loss": 2.3369, + "step": 528200 + }, + { + "epoch": 13.53, + "learning_rate": 4.887795880725484e-06, + "loss": 2.4021, + "step": 528300 + }, + { + "epoch": 13.54, + "learning_rate": 4.879256754448885e-06, + "loss": 2.283, + "step": 528400 + }, + { + "epoch": 13.54, + "learning_rate": 4.8707176281722854e-06, + "loss": 2.2235, + "step": 528500 + }, + { + "epoch": 13.54, + "learning_rate": 4.862178501895686e-06, + "loss": 2.2042, + "step": 528600 + }, + { + "epoch": 13.54, + "learning_rate": 4.853639375619087e-06, + "loss": 2.1249, + "step": 528700 + }, + { + "epoch": 13.55, + "learning_rate": 4.8451002493424876e-06, + "loss": 2.469, + "step": 528800 + }, + { + "epoch": 13.55, + "learning_rate": 4.836561123065888e-06, + "loss": 2.3044, + "step": 528900 + }, + { + "epoch": 13.55, + "learning_rate": 4.828021996789289e-06, + "loss": 2.4205, + "step": 529000 + }, + { + "epoch": 13.55, + "learning_rate": 4.819482870512689e-06, + "loss": 2.3678, + "step": 529100 + }, + { + "epoch": 13.56, + "learning_rate": 4.8109437442360895e-06, + "loss": 2.2873, + "step": 529200 + }, + { + "epoch": 13.56, + "learning_rate": 4.80240461795949e-06, + "loss": 2.2397, + "step": 529300 + }, + { + "epoch": 13.56, + "learning_rate": 4.793865491682891e-06, + "loss": 2.2666, + "step": 529400 + }, + { + "epoch": 13.56, + "learning_rate": 4.785326365406292e-06, + "loss": 2.1234, + "step": 529500 + }, + { + "epoch": 13.57, + "learning_rate": 4.776787239129692e-06, + "loss": 2.297, + "step": 529600 + }, + { + "epoch": 13.57, + "learning_rate": 4.768248112853093e-06, + "loss": 2.2197, + "step": 529700 + }, + { + "epoch": 13.57, + "learning_rate": 4.759708986576494e-06, + "loss": 2.3912, + "step": 529800 + }, + { + "epoch": 13.57, + "learning_rate": 4.751169860299894e-06, + "loss": 2.2514, + "step": 529900 + }, + { + "epoch": 13.58, + "learning_rate": 4.742630734023295e-06, + "loss": 2.2146, + "step": 530000 + }, + { + "epoch": 13.58, + "learning_rate": 4.734091607746696e-06, + "loss": 2.2581, + "step": 530100 + }, + { + "epoch": 13.58, + "learning_rate": 4.7255524814700965e-06, + "loss": 2.2877, + "step": 530200 + }, + { + "epoch": 13.58, + "learning_rate": 4.717013355193497e-06, + "loss": 2.277, + "step": 530300 + }, + { + "epoch": 13.59, + "learning_rate": 4.708474228916897e-06, + "loss": 2.1507, + "step": 530400 + }, + { + "epoch": 13.59, + "learning_rate": 4.699935102640298e-06, + "loss": 2.3655, + "step": 530500 + }, + { + "epoch": 13.59, + "learning_rate": 4.6913959763636985e-06, + "loss": 2.3029, + "step": 530600 + }, + { + "epoch": 13.6, + "learning_rate": 4.682856850087099e-06, + "loss": 2.3902, + "step": 530700 + }, + { + "epoch": 13.6, + "learning_rate": 4.6743177238105e-06, + "loss": 2.4023, + "step": 530800 + }, + { + "epoch": 13.6, + "learning_rate": 4.665778597533901e-06, + "loss": 2.3543, + "step": 530900 + }, + { + "epoch": 13.6, + "learning_rate": 4.657239471257301e-06, + "loss": 2.4241, + "step": 531000 + }, + { + "epoch": 13.61, + "learning_rate": 4.648700344980702e-06, + "loss": 2.4799, + "step": 531100 + }, + { + "epoch": 13.61, + "learning_rate": 4.640161218704103e-06, + "loss": 2.2627, + "step": 531200 + }, + { + "epoch": 13.61, + "learning_rate": 4.631622092427503e-06, + "loss": 2.2909, + "step": 531300 + }, + { + "epoch": 13.61, + "learning_rate": 4.623082966150904e-06, + "loss": 2.3981, + "step": 531400 + }, + { + "epoch": 13.62, + "learning_rate": 4.614543839874305e-06, + "loss": 2.2313, + "step": 531500 + }, + { + "epoch": 13.62, + "learning_rate": 4.6060047135977055e-06, + "loss": 2.2898, + "step": 531600 + }, + { + "epoch": 13.62, + "learning_rate": 4.597465587321105e-06, + "loss": 2.0953, + "step": 531700 + }, + { + "epoch": 13.62, + "learning_rate": 4.588926461044506e-06, + "loss": 2.3292, + "step": 531800 + }, + { + "epoch": 13.63, + "learning_rate": 4.580387334767907e-06, + "loss": 2.2417, + "step": 531900 + }, + { + "epoch": 13.63, + "learning_rate": 4.5718482084913074e-06, + "loss": 2.2797, + "step": 532000 + }, + { + "epoch": 13.63, + "learning_rate": 4.563309082214708e-06, + "loss": 2.4571, + "step": 532100 + }, + { + "epoch": 13.63, + "learning_rate": 4.554769955938109e-06, + "loss": 2.3237, + "step": 532200 + }, + { + "epoch": 13.64, + "learning_rate": 4.5462308296615096e-06, + "loss": 2.2959, + "step": 532300 + }, + { + "epoch": 13.64, + "learning_rate": 4.53769170338491e-06, + "loss": 2.474, + "step": 532400 + }, + { + "epoch": 13.64, + "learning_rate": 4.529152577108311e-06, + "loss": 2.2593, + "step": 532500 + }, + { + "epoch": 13.64, + "learning_rate": 4.520613450831711e-06, + "loss": 2.4216, + "step": 532600 + }, + { + "epoch": 13.65, + "learning_rate": 4.5120743245551115e-06, + "loss": 2.4381, + "step": 532700 + }, + { + "epoch": 13.65, + "learning_rate": 4.503535198278512e-06, + "loss": 2.3255, + "step": 532800 + }, + { + "epoch": 13.65, + "learning_rate": 4.494996072001913e-06, + "loss": 2.2518, + "step": 532900 + }, + { + "epoch": 13.65, + "learning_rate": 4.486456945725314e-06, + "loss": 2.1913, + "step": 533000 + }, + { + "epoch": 13.66, + "learning_rate": 4.477917819448714e-06, + "loss": 2.2473, + "step": 533100 + }, + { + "epoch": 13.66, + "learning_rate": 4.469378693172115e-06, + "loss": 2.4085, + "step": 533200 + }, + { + "epoch": 13.66, + "learning_rate": 4.460839566895515e-06, + "loss": 2.2575, + "step": 533300 + }, + { + "epoch": 13.66, + "learning_rate": 4.4523004406189156e-06, + "loss": 2.4073, + "step": 533400 + }, + { + "epoch": 13.67, + "learning_rate": 4.443761314342316e-06, + "loss": 2.356, + "step": 533500 + }, + { + "epoch": 13.67, + "learning_rate": 4.435222188065717e-06, + "loss": 2.306, + "step": 533600 + }, + { + "epoch": 13.67, + "learning_rate": 4.426683061789118e-06, + "loss": 2.1347, + "step": 533700 + }, + { + "epoch": 13.67, + "learning_rate": 4.418143935512518e-06, + "loss": 2.2145, + "step": 533800 + }, + { + "epoch": 13.68, + "learning_rate": 4.409604809235919e-06, + "loss": 2.2796, + "step": 533900 + }, + { + "epoch": 13.68, + "learning_rate": 4.40106568295932e-06, + "loss": 2.3241, + "step": 534000 + }, + { + "epoch": 13.68, + "learning_rate": 4.3925265566827205e-06, + "loss": 2.2354, + "step": 534100 + }, + { + "epoch": 13.68, + "learning_rate": 4.383987430406121e-06, + "loss": 2.1824, + "step": 534200 + }, + { + "epoch": 13.69, + "learning_rate": 4.375448304129522e-06, + "loss": 2.35, + "step": 534300 + }, + { + "epoch": 13.69, + "learning_rate": 4.366909177852923e-06, + "loss": 2.3045, + "step": 534400 + }, + { + "epoch": 13.69, + "learning_rate": 4.358370051576323e-06, + "loss": 2.3268, + "step": 534500 + }, + { + "epoch": 13.7, + "learning_rate": 4.349830925299723e-06, + "loss": 2.2335, + "step": 534600 + }, + { + "epoch": 13.7, + "learning_rate": 4.341291799023124e-06, + "loss": 2.2382, + "step": 534700 + }, + { + "epoch": 13.7, + "learning_rate": 4.3327526727465245e-06, + "loss": 2.3745, + "step": 534800 + }, + { + "epoch": 13.7, + "learning_rate": 4.324213546469925e-06, + "loss": 2.4012, + "step": 534900 + }, + { + "epoch": 13.71, + "learning_rate": 4.315674420193326e-06, + "loss": 2.3895, + "step": 535000 + }, + { + "epoch": 13.71, + "learning_rate": 4.307135293916727e-06, + "loss": 2.276, + "step": 535100 + }, + { + "epoch": 13.71, + "learning_rate": 4.298596167640127e-06, + "loss": 2.3365, + "step": 535200 + }, + { + "epoch": 13.71, + "learning_rate": 4.290057041363528e-06, + "loss": 2.1478, + "step": 535300 + }, + { + "epoch": 13.72, + "learning_rate": 4.281517915086929e-06, + "loss": 2.2136, + "step": 535400 + }, + { + "epoch": 13.72, + "learning_rate": 4.2729787888103294e-06, + "loss": 2.1603, + "step": 535500 + }, + { + "epoch": 13.72, + "learning_rate": 4.26443966253373e-06, + "loss": 2.2522, + "step": 535600 + }, + { + "epoch": 13.72, + "learning_rate": 4.255900536257131e-06, + "loss": 2.3462, + "step": 535700 + }, + { + "epoch": 13.73, + "learning_rate": 4.2473614099805316e-06, + "loss": 2.4739, + "step": 535800 + }, + { + "epoch": 13.73, + "learning_rate": 4.238822283703931e-06, + "loss": 2.2846, + "step": 535900 + }, + { + "epoch": 13.73, + "learning_rate": 4.230283157427332e-06, + "loss": 2.3267, + "step": 536000 + }, + { + "epoch": 13.73, + "learning_rate": 4.221744031150733e-06, + "loss": 2.2946, + "step": 536100 + }, + { + "epoch": 13.74, + "learning_rate": 4.2132049048741335e-06, + "loss": 2.1301, + "step": 536200 + }, + { + "epoch": 13.74, + "learning_rate": 4.204665778597534e-06, + "loss": 2.4257, + "step": 536300 + }, + { + "epoch": 13.74, + "learning_rate": 4.196126652320935e-06, + "loss": 2.2667, + "step": 536400 + }, + { + "epoch": 13.74, + "learning_rate": 4.187587526044336e-06, + "loss": 2.2935, + "step": 536500 + }, + { + "epoch": 13.75, + "learning_rate": 4.179048399767736e-06, + "loss": 2.4029, + "step": 536600 + }, + { + "epoch": 13.75, + "learning_rate": 4.170509273491137e-06, + "loss": 2.204, + "step": 536700 + }, + { + "epoch": 13.75, + "learning_rate": 4.161970147214538e-06, + "loss": 2.4656, + "step": 536800 + }, + { + "epoch": 13.75, + "learning_rate": 4.1534310209379376e-06, + "loss": 2.3937, + "step": 536900 + }, + { + "epoch": 13.76, + "learning_rate": 4.144891894661338e-06, + "loss": 2.2908, + "step": 537000 + }, + { + "epoch": 13.76, + "learning_rate": 4.136352768384739e-06, + "loss": 2.2052, + "step": 537100 + }, + { + "epoch": 13.76, + "learning_rate": 4.12781364210814e-06, + "loss": 2.3669, + "step": 537200 + }, + { + "epoch": 13.76, + "learning_rate": 4.11927451583154e-06, + "loss": 2.2515, + "step": 537300 + }, + { + "epoch": 13.77, + "learning_rate": 4.110735389554941e-06, + "loss": 2.2041, + "step": 537400 + }, + { + "epoch": 13.77, + "learning_rate": 4.102196263278341e-06, + "loss": 2.2503, + "step": 537500 + }, + { + "epoch": 13.77, + "learning_rate": 4.093657137001742e-06, + "loss": 2.3934, + "step": 537600 + }, + { + "epoch": 13.77, + "learning_rate": 4.085118010725142e-06, + "loss": 2.3733, + "step": 537700 + }, + { + "epoch": 13.78, + "learning_rate": 4.076578884448543e-06, + "loss": 2.3535, + "step": 537800 + }, + { + "epoch": 13.78, + "learning_rate": 4.068039758171944e-06, + "loss": 2.1226, + "step": 537900 + }, + { + "epoch": 13.78, + "learning_rate": 4.0595006318953444e-06, + "loss": 2.3963, + "step": 538000 + }, + { + "epoch": 13.78, + "learning_rate": 4.050961505618745e-06, + "loss": 2.3109, + "step": 538100 + }, + { + "epoch": 13.79, + "learning_rate": 4.042422379342146e-06, + "loss": 2.2519, + "step": 538200 + }, + { + "epoch": 13.79, + "learning_rate": 4.0338832530655465e-06, + "loss": 2.2264, + "step": 538300 + }, + { + "epoch": 13.79, + "learning_rate": 4.025344126788947e-06, + "loss": 2.3696, + "step": 538400 + }, + { + "epoch": 13.79, + "learning_rate": 4.016805000512348e-06, + "loss": 2.3853, + "step": 538500 + }, + { + "epoch": 13.8, + "learning_rate": 4.008265874235749e-06, + "loss": 2.243, + "step": 538600 + }, + { + "epoch": 13.8, + "learning_rate": 3.999726747959149e-06, + "loss": 2.1949, + "step": 538700 + }, + { + "epoch": 13.8, + "learning_rate": 3.991187621682549e-06, + "loss": 2.4244, + "step": 538800 + }, + { + "epoch": 13.81, + "learning_rate": 3.98264849540595e-06, + "loss": 2.3751, + "step": 538900 + }, + { + "epoch": 13.81, + "learning_rate": 3.974109369129351e-06, + "loss": 2.3418, + "step": 539000 + }, + { + "epoch": 13.81, + "learning_rate": 3.965570242852751e-06, + "loss": 2.3028, + "step": 539100 + }, + { + "epoch": 13.81, + "learning_rate": 3.957031116576152e-06, + "loss": 2.3154, + "step": 539200 + }, + { + "epoch": 13.82, + "learning_rate": 3.948491990299553e-06, + "loss": 2.2674, + "step": 539300 + }, + { + "epoch": 13.82, + "learning_rate": 3.939952864022953e-06, + "loss": 2.3125, + "step": 539400 + }, + { + "epoch": 13.82, + "learning_rate": 3.931413737746354e-06, + "loss": 2.4033, + "step": 539500 + }, + { + "epoch": 13.82, + "learning_rate": 3.922874611469755e-06, + "loss": 2.2926, + "step": 539600 + }, + { + "epoch": 13.83, + "learning_rate": 3.9143354851931555e-06, + "loss": 2.4927, + "step": 539700 + }, + { + "epoch": 13.83, + "learning_rate": 3.905796358916556e-06, + "loss": 2.3426, + "step": 539800 + }, + { + "epoch": 13.83, + "learning_rate": 3.897257232639957e-06, + "loss": 2.3957, + "step": 539900 + }, + { + "epoch": 13.83, + "learning_rate": 3.888718106363358e-06, + "loss": 2.3603, + "step": 540000 + }, + { + "epoch": 13.84, + "learning_rate": 3.8801789800867575e-06, + "loss": 2.2371, + "step": 540100 + }, + { + "epoch": 13.84, + "learning_rate": 3.871639853810158e-06, + "loss": 2.393, + "step": 540200 + }, + { + "epoch": 13.84, + "learning_rate": 3.863100727533559e-06, + "loss": 2.1292, + "step": 540300 + }, + { + "epoch": 13.84, + "learning_rate": 3.8545616012569596e-06, + "loss": 2.3997, + "step": 540400 + }, + { + "epoch": 13.85, + "learning_rate": 3.84602247498036e-06, + "loss": 2.4442, + "step": 540500 + }, + { + "epoch": 13.85, + "learning_rate": 3.837483348703761e-06, + "loss": 2.1941, + "step": 540600 + }, + { + "epoch": 13.85, + "learning_rate": 3.828944222427162e-06, + "loss": 2.3835, + "step": 540700 + }, + { + "epoch": 13.85, + "learning_rate": 3.820405096150562e-06, + "loss": 2.225, + "step": 540800 + }, + { + "epoch": 13.86, + "learning_rate": 3.811865969873963e-06, + "loss": 2.2939, + "step": 540900 + }, + { + "epoch": 13.86, + "learning_rate": 3.8033268435973638e-06, + "loss": 2.1806, + "step": 541000 + }, + { + "epoch": 13.86, + "learning_rate": 3.794787717320764e-06, + "loss": 2.4158, + "step": 541100 + }, + { + "epoch": 13.86, + "learning_rate": 3.7862485910441648e-06, + "loss": 2.3077, + "step": 541200 + }, + { + "epoch": 13.87, + "learning_rate": 3.7777094647675646e-06, + "loss": 2.2717, + "step": 541300 + }, + { + "epoch": 13.87, + "learning_rate": 3.7691703384909653e-06, + "loss": 2.4561, + "step": 541400 + }, + { + "epoch": 13.87, + "learning_rate": 3.760631212214366e-06, + "loss": 2.2191, + "step": 541500 + }, + { + "epoch": 13.87, + "learning_rate": 3.7520920859377667e-06, + "loss": 2.2821, + "step": 541600 + }, + { + "epoch": 13.88, + "learning_rate": 3.7435529596611674e-06, + "loss": 2.1932, + "step": 541700 + }, + { + "epoch": 13.88, + "learning_rate": 3.735013833384568e-06, + "loss": 2.1749, + "step": 541800 + }, + { + "epoch": 13.88, + "learning_rate": 3.726474707107969e-06, + "loss": 2.3083, + "step": 541900 + }, + { + "epoch": 13.88, + "learning_rate": 3.717935580831369e-06, + "loss": 2.2537, + "step": 542000 + }, + { + "epoch": 13.89, + "learning_rate": 3.70939645455477e-06, + "loss": 2.2607, + "step": 542100 + }, + { + "epoch": 13.89, + "learning_rate": 3.7008573282781705e-06, + "loss": 2.3312, + "step": 542200 + }, + { + "epoch": 13.89, + "learning_rate": 3.692318202001571e-06, + "loss": 2.3231, + "step": 542300 + }, + { + "epoch": 13.89, + "learning_rate": 3.683779075724972e-06, + "loss": 2.2841, + "step": 542400 + }, + { + "epoch": 13.9, + "learning_rate": 3.6752399494483726e-06, + "loss": 2.1106, + "step": 542500 + }, + { + "epoch": 13.9, + "learning_rate": 3.6667008231717733e-06, + "loss": 2.5235, + "step": 542600 + }, + { + "epoch": 13.9, + "learning_rate": 3.6581616968951736e-06, + "loss": 2.3049, + "step": 542700 + }, + { + "epoch": 13.91, + "learning_rate": 3.6496225706185743e-06, + "loss": 2.1696, + "step": 542800 + }, + { + "epoch": 13.91, + "learning_rate": 3.641083444341975e-06, + "loss": 2.3529, + "step": 542900 + }, + { + "epoch": 13.91, + "learning_rate": 3.6325443180653757e-06, + "loss": 2.5494, + "step": 543000 + }, + { + "epoch": 13.91, + "learning_rate": 3.6240051917887764e-06, + "loss": 2.4923, + "step": 543100 + }, + { + "epoch": 13.92, + "learning_rate": 3.615466065512177e-06, + "loss": 2.2472, + "step": 543200 + }, + { + "epoch": 13.92, + "learning_rate": 3.6069269392355774e-06, + "loss": 2.1455, + "step": 543300 + }, + { + "epoch": 13.92, + "learning_rate": 3.598387812958978e-06, + "loss": 2.2442, + "step": 543400 + }, + { + "epoch": 13.92, + "learning_rate": 3.5898486866823788e-06, + "loss": 2.2608, + "step": 543500 + }, + { + "epoch": 13.93, + "learning_rate": 3.5813095604057795e-06, + "loss": 2.4307, + "step": 543600 + }, + { + "epoch": 13.93, + "learning_rate": 3.57277043412918e-06, + "loss": 2.5147, + "step": 543700 + }, + { + "epoch": 13.93, + "learning_rate": 3.564231307852581e-06, + "loss": 2.2385, + "step": 543800 + }, + { + "epoch": 13.93, + "learning_rate": 3.5556921815759816e-06, + "loss": 2.1395, + "step": 543900 + }, + { + "epoch": 13.94, + "learning_rate": 3.547153055299382e-06, + "loss": 2.3087, + "step": 544000 + }, + { + "epoch": 13.94, + "learning_rate": 3.5386139290227826e-06, + "loss": 2.2105, + "step": 544100 + }, + { + "epoch": 13.94, + "learning_rate": 3.5300748027461833e-06, + "loss": 2.3762, + "step": 544200 + }, + { + "epoch": 13.94, + "learning_rate": 3.521535676469584e-06, + "loss": 2.297, + "step": 544300 + }, + { + "epoch": 13.95, + "learning_rate": 3.5129965501929847e-06, + "loss": 2.3126, + "step": 544400 + }, + { + "epoch": 13.95, + "learning_rate": 3.5044574239163854e-06, + "loss": 2.1932, + "step": 544500 + }, + { + "epoch": 13.95, + "learning_rate": 3.4959182976397856e-06, + "loss": 2.3053, + "step": 544600 + }, + { + "epoch": 13.95, + "learning_rate": 3.4873791713631863e-06, + "loss": 2.4063, + "step": 544700 + }, + { + "epoch": 13.96, + "learning_rate": 3.478840045086587e-06, + "loss": 2.2226, + "step": 544800 + }, + { + "epoch": 13.96, + "learning_rate": 3.4703009188099877e-06, + "loss": 2.3015, + "step": 544900 + }, + { + "epoch": 13.96, + "learning_rate": 3.4617617925333884e-06, + "loss": 2.0615, + "step": 545000 + }, + { + "epoch": 13.96, + "learning_rate": 3.453222666256789e-06, + "loss": 2.3217, + "step": 545100 + }, + { + "epoch": 13.97, + "learning_rate": 3.44468353998019e-06, + "loss": 2.2258, + "step": 545200 + }, + { + "epoch": 13.97, + "learning_rate": 3.43614441370359e-06, + "loss": 2.5063, + "step": 545300 + }, + { + "epoch": 13.97, + "learning_rate": 3.427605287426991e-06, + "loss": 2.1059, + "step": 545400 + }, + { + "epoch": 13.97, + "learning_rate": 3.4190661611503915e-06, + "loss": 2.294, + "step": 545500 + }, + { + "epoch": 13.98, + "learning_rate": 3.4105270348737922e-06, + "loss": 2.1908, + "step": 545600 + }, + { + "epoch": 13.98, + "learning_rate": 3.401987908597192e-06, + "loss": 2.4956, + "step": 545700 + }, + { + "epoch": 13.98, + "learning_rate": 3.3934487823205928e-06, + "loss": 2.4304, + "step": 545800 + }, + { + "epoch": 13.98, + "learning_rate": 3.3849096560439935e-06, + "loss": 2.228, + "step": 545900 + }, + { + "epoch": 13.99, + "learning_rate": 3.376370529767394e-06, + "loss": 2.1937, + "step": 546000 + }, + { + "epoch": 13.99, + "learning_rate": 3.367831403490795e-06, + "loss": 2.1773, + "step": 546100 + }, + { + "epoch": 13.99, + "learning_rate": 3.359292277214195e-06, + "loss": 2.2041, + "step": 546200 + }, + { + "epoch": 13.99, + "learning_rate": 3.350753150937596e-06, + "loss": 2.359, + "step": 546300 + }, + { + "epoch": 14.0, + "learning_rate": 3.3422140246609966e-06, + "loss": 2.4075, + "step": 546400 + }, + { + "epoch": 14.0, + "learning_rate": 3.3336748983843973e-06, + "loss": 2.3136, + "step": 546500 + }, + { + "epoch": 14.0, + "learning_rate": 3.325135772107798e-06, + "loss": 2.3028, + "step": 546600 + }, + { + "epoch": 14.01, + "learning_rate": 3.3165966458311987e-06, + "loss": 2.3709, + "step": 546700 + }, + { + "epoch": 14.01, + "learning_rate": 3.3080575195545994e-06, + "loss": 2.084, + "step": 546800 + }, + { + "epoch": 14.01, + "learning_rate": 3.2995183932779996e-06, + "loss": 2.4734, + "step": 546900 + }, + { + "epoch": 14.01, + "learning_rate": 3.2909792670014003e-06, + "loss": 2.1511, + "step": 547000 + }, + { + "epoch": 14.02, + "learning_rate": 3.282440140724801e-06, + "loss": 2.2915, + "step": 547100 + }, + { + "epoch": 14.02, + "learning_rate": 3.2739010144482017e-06, + "loss": 2.2365, + "step": 547200 + }, + { + "epoch": 14.02, + "learning_rate": 3.2653618881716024e-06, + "loss": 2.3072, + "step": 547300 + }, + { + "epoch": 14.02, + "learning_rate": 3.256822761895003e-06, + "loss": 2.2433, + "step": 547400 + }, + { + "epoch": 14.03, + "learning_rate": 3.2482836356184034e-06, + "loss": 2.3882, + "step": 547500 + }, + { + "epoch": 14.03, + "learning_rate": 3.239744509341804e-06, + "loss": 2.1129, + "step": 547600 + }, + { + "epoch": 14.03, + "learning_rate": 3.231205383065205e-06, + "loss": 2.3175, + "step": 547700 + }, + { + "epoch": 14.03, + "learning_rate": 3.2226662567886055e-06, + "loss": 2.402, + "step": 547800 + }, + { + "epoch": 14.04, + "learning_rate": 3.2141271305120062e-06, + "loss": 2.2457, + "step": 547900 + }, + { + "epoch": 14.04, + "learning_rate": 3.205588004235407e-06, + "loss": 2.285, + "step": 548000 + }, + { + "epoch": 14.04, + "learning_rate": 3.1970488779588076e-06, + "loss": 2.2699, + "step": 548100 + }, + { + "epoch": 14.04, + "learning_rate": 3.188509751682208e-06, + "loss": 2.1153, + "step": 548200 + }, + { + "epoch": 14.05, + "learning_rate": 3.1799706254056086e-06, + "loss": 2.2285, + "step": 548300 + }, + { + "epoch": 14.05, + "learning_rate": 3.1714314991290093e-06, + "loss": 2.2391, + "step": 548400 + }, + { + "epoch": 14.05, + "learning_rate": 3.16289237285241e-06, + "loss": 2.4201, + "step": 548500 + }, + { + "epoch": 14.05, + "learning_rate": 3.1543532465758107e-06, + "loss": 2.2739, + "step": 548600 + }, + { + "epoch": 14.06, + "learning_rate": 3.1458141202992114e-06, + "loss": 2.3189, + "step": 548700 + }, + { + "epoch": 14.06, + "learning_rate": 3.137274994022612e-06, + "loss": 2.39, + "step": 548800 + }, + { + "epoch": 14.06, + "learning_rate": 3.1287358677460124e-06, + "loss": 2.2188, + "step": 548900 + }, + { + "epoch": 14.06, + "learning_rate": 3.1201967414694127e-06, + "loss": 2.2681, + "step": 549000 + }, + { + "epoch": 14.07, + "learning_rate": 3.1116576151928134e-06, + "loss": 2.262, + "step": 549100 + }, + { + "epoch": 14.07, + "learning_rate": 3.103118488916214e-06, + "loss": 2.3291, + "step": 549200 + }, + { + "epoch": 14.07, + "learning_rate": 3.0945793626396148e-06, + "loss": 2.1329, + "step": 549300 + }, + { + "epoch": 14.07, + "learning_rate": 3.0860402363630155e-06, + "loss": 2.0695, + "step": 549400 + }, + { + "epoch": 14.08, + "learning_rate": 3.077501110086416e-06, + "loss": 2.5006, + "step": 549500 + }, + { + "epoch": 14.08, + "learning_rate": 3.0689619838098165e-06, + "loss": 2.2184, + "step": 549600 + }, + { + "epoch": 14.08, + "learning_rate": 3.060422857533217e-06, + "loss": 2.088, + "step": 549700 + }, + { + "epoch": 14.08, + "learning_rate": 3.051883731256618e-06, + "loss": 2.2616, + "step": 549800 + }, + { + "epoch": 14.09, + "learning_rate": 3.0433446049800186e-06, + "loss": 2.2769, + "step": 549900 + }, + { + "epoch": 14.09, + "learning_rate": 3.0348054787034193e-06, + "loss": 2.3596, + "step": 550000 + }, + { + "epoch": 14.09, + "learning_rate": 3.02626635242682e-06, + "loss": 2.2853, + "step": 550100 + }, + { + "epoch": 14.09, + "learning_rate": 3.0177272261502207e-06, + "loss": 2.2473, + "step": 550200 + }, + { + "epoch": 14.1, + "learning_rate": 3.009188099873621e-06, + "loss": 2.3455, + "step": 550300 + }, + { + "epoch": 14.1, + "learning_rate": 3.0006489735970216e-06, + "loss": 2.3338, + "step": 550400 + }, + { + "epoch": 14.1, + "learning_rate": 2.9921098473204223e-06, + "loss": 2.2896, + "step": 550500 + }, + { + "epoch": 14.1, + "learning_rate": 2.983570721043823e-06, + "loss": 2.2142, + "step": 550600 + }, + { + "epoch": 14.11, + "learning_rate": 2.9750315947672237e-06, + "loss": 2.3097, + "step": 550700 + }, + { + "epoch": 14.11, + "learning_rate": 2.9664924684906244e-06, + "loss": 2.2682, + "step": 550800 + }, + { + "epoch": 14.11, + "learning_rate": 2.957953342214025e-06, + "loss": 2.301, + "step": 550900 + }, + { + "epoch": 14.12, + "learning_rate": 2.9494142159374254e-06, + "loss": 2.5299, + "step": 551000 + }, + { + "epoch": 14.12, + "learning_rate": 2.940875089660826e-06, + "loss": 2.3803, + "step": 551100 + }, + { + "epoch": 14.12, + "learning_rate": 2.9323359633842264e-06, + "loss": 2.1536, + "step": 551200 + }, + { + "epoch": 14.12, + "learning_rate": 2.923796837107627e-06, + "loss": 2.3938, + "step": 551300 + }, + { + "epoch": 14.13, + "learning_rate": 2.915257710831028e-06, + "loss": 2.2973, + "step": 551400 + }, + { + "epoch": 14.13, + "learning_rate": 2.9067185845544285e-06, + "loss": 2.2337, + "step": 551500 + }, + { + "epoch": 14.13, + "learning_rate": 2.898179458277829e-06, + "loss": 2.2885, + "step": 551600 + }, + { + "epoch": 14.13, + "learning_rate": 2.8896403320012295e-06, + "loss": 2.3434, + "step": 551700 + }, + { + "epoch": 14.14, + "learning_rate": 2.88110120572463e-06, + "loss": 2.3131, + "step": 551800 + }, + { + "epoch": 14.14, + "learning_rate": 2.872562079448031e-06, + "loss": 2.3793, + "step": 551900 + }, + { + "epoch": 14.14, + "learning_rate": 2.8640229531714316e-06, + "loss": 2.0242, + "step": 552000 + }, + { + "epoch": 14.14, + "learning_rate": 2.8554838268948323e-06, + "loss": 2.3354, + "step": 552100 + }, + { + "epoch": 14.15, + "learning_rate": 2.846944700618233e-06, + "loss": 2.2313, + "step": 552200 + }, + { + "epoch": 14.15, + "learning_rate": 2.8384055743416337e-06, + "loss": 2.346, + "step": 552300 + }, + { + "epoch": 14.15, + "learning_rate": 2.829866448065034e-06, + "loss": 2.3449, + "step": 552400 + }, + { + "epoch": 14.15, + "learning_rate": 2.8213273217884347e-06, + "loss": 2.2808, + "step": 552500 + }, + { + "epoch": 14.16, + "learning_rate": 2.8127881955118354e-06, + "loss": 2.205, + "step": 552600 + }, + { + "epoch": 14.16, + "learning_rate": 2.804249069235236e-06, + "loss": 2.3683, + "step": 552700 + }, + { + "epoch": 14.16, + "learning_rate": 2.7957099429586368e-06, + "loss": 2.3848, + "step": 552800 + }, + { + "epoch": 14.16, + "learning_rate": 2.7871708166820375e-06, + "loss": 2.3793, + "step": 552900 + }, + { + "epoch": 14.17, + "learning_rate": 2.778631690405438e-06, + "loss": 2.4302, + "step": 553000 + }, + { + "epoch": 14.17, + "learning_rate": 2.7700925641288385e-06, + "loss": 2.5048, + "step": 553100 + }, + { + "epoch": 14.17, + "learning_rate": 2.761553437852239e-06, + "loss": 2.3093, + "step": 553200 + }, + { + "epoch": 14.17, + "learning_rate": 2.75301431157564e-06, + "loss": 2.1302, + "step": 553300 + }, + { + "epoch": 14.18, + "learning_rate": 2.74447518529904e-06, + "loss": 2.5015, + "step": 553400 + }, + { + "epoch": 14.18, + "learning_rate": 2.735936059022441e-06, + "loss": 2.4615, + "step": 553500 + }, + { + "epoch": 14.18, + "learning_rate": 2.7273969327458415e-06, + "loss": 2.5099, + "step": 553600 + }, + { + "epoch": 14.18, + "learning_rate": 2.7188578064692422e-06, + "loss": 2.2814, + "step": 553700 + }, + { + "epoch": 14.19, + "learning_rate": 2.7103186801926425e-06, + "loss": 2.1708, + "step": 553800 + }, + { + "epoch": 14.19, + "learning_rate": 2.7017795539160432e-06, + "loss": 2.3461, + "step": 553900 + }, + { + "epoch": 14.19, + "learning_rate": 2.693240427639444e-06, + "loss": 2.1178, + "step": 554000 + }, + { + "epoch": 14.19, + "learning_rate": 2.6847013013628446e-06, + "loss": 2.2476, + "step": 554100 + }, + { + "epoch": 14.2, + "learning_rate": 2.6761621750862453e-06, + "loss": 2.1886, + "step": 554200 + }, + { + "epoch": 14.2, + "learning_rate": 2.667623048809646e-06, + "loss": 2.235, + "step": 554300 + }, + { + "epoch": 14.2, + "learning_rate": 2.6590839225330467e-06, + "loss": 2.3423, + "step": 554400 + }, + { + "epoch": 14.2, + "learning_rate": 2.650544796256447e-06, + "loss": 2.3436, + "step": 554500 + }, + { + "epoch": 14.21, + "learning_rate": 2.6420056699798477e-06, + "loss": 2.2064, + "step": 554600 + }, + { + "epoch": 14.21, + "learning_rate": 2.6334665437032484e-06, + "loss": 2.4161, + "step": 554700 + }, + { + "epoch": 14.21, + "learning_rate": 2.624927417426649e-06, + "loss": 2.241, + "step": 554800 + }, + { + "epoch": 14.22, + "learning_rate": 2.61638829115005e-06, + "loss": 2.3325, + "step": 554900 + }, + { + "epoch": 14.22, + "learning_rate": 2.6078491648734505e-06, + "loss": 2.223, + "step": 555000 + }, + { + "epoch": 14.22, + "learning_rate": 2.599310038596851e-06, + "loss": 2.2619, + "step": 555100 + }, + { + "epoch": 14.22, + "learning_rate": 2.5907709123202515e-06, + "loss": 2.3514, + "step": 555200 + }, + { + "epoch": 14.23, + "learning_rate": 2.582231786043652e-06, + "loss": 2.2996, + "step": 555300 + }, + { + "epoch": 14.23, + "learning_rate": 2.573692659767053e-06, + "loss": 2.3588, + "step": 555400 + }, + { + "epoch": 14.23, + "learning_rate": 2.5651535334904536e-06, + "loss": 2.3086, + "step": 555500 + }, + { + "epoch": 14.23, + "learning_rate": 2.556614407213854e-06, + "loss": 2.1997, + "step": 555600 + }, + { + "epoch": 14.24, + "learning_rate": 2.5480752809372546e-06, + "loss": 2.1863, + "step": 555700 + }, + { + "epoch": 14.24, + "learning_rate": 2.5395361546606553e-06, + "loss": 2.171, + "step": 555800 + }, + { + "epoch": 14.24, + "learning_rate": 2.5309970283840555e-06, + "loss": 2.2636, + "step": 555900 + }, + { + "epoch": 14.24, + "learning_rate": 2.5224579021074563e-06, + "loss": 2.3338, + "step": 556000 + }, + { + "epoch": 14.25, + "learning_rate": 2.513918775830857e-06, + "loss": 2.3938, + "step": 556100 + }, + { + "epoch": 14.25, + "learning_rate": 2.5053796495542577e-06, + "loss": 2.4101, + "step": 556200 + }, + { + "epoch": 14.25, + "learning_rate": 2.4968405232776584e-06, + "loss": 2.2689, + "step": 556300 + }, + { + "epoch": 14.25, + "learning_rate": 2.488301397001059e-06, + "loss": 2.2431, + "step": 556400 + }, + { + "epoch": 14.26, + "learning_rate": 2.4797622707244598e-06, + "loss": 2.3691, + "step": 556500 + }, + { + "epoch": 14.26, + "learning_rate": 2.47122314444786e-06, + "loss": 2.3202, + "step": 556600 + }, + { + "epoch": 14.26, + "learning_rate": 2.4626840181712607e-06, + "loss": 2.094, + "step": 556700 + }, + { + "epoch": 14.26, + "learning_rate": 2.4541448918946614e-06, + "loss": 2.4063, + "step": 556800 + }, + { + "epoch": 14.27, + "learning_rate": 2.445605765618062e-06, + "loss": 2.2024, + "step": 556900 + }, + { + "epoch": 14.27, + "learning_rate": 2.437066639341463e-06, + "loss": 2.1287, + "step": 557000 + }, + { + "epoch": 14.27, + "learning_rate": 2.4285275130648635e-06, + "loss": 2.1329, + "step": 557100 + }, + { + "epoch": 14.27, + "learning_rate": 2.4199883867882642e-06, + "loss": 2.3669, + "step": 557200 + }, + { + "epoch": 14.28, + "learning_rate": 2.4114492605116645e-06, + "loss": 2.3657, + "step": 557300 + }, + { + "epoch": 14.28, + "learning_rate": 2.4029101342350652e-06, + "loss": 2.4, + "step": 557400 + }, + { + "epoch": 14.28, + "learning_rate": 2.394371007958466e-06, + "loss": 2.3162, + "step": 557500 + }, + { + "epoch": 14.28, + "learning_rate": 2.3858318816818666e-06, + "loss": 2.3129, + "step": 557600 + }, + { + "epoch": 14.29, + "learning_rate": 2.3772927554052673e-06, + "loss": 2.1451, + "step": 557700 + }, + { + "epoch": 14.29, + "learning_rate": 2.3687536291286676e-06, + "loss": 2.4304, + "step": 557800 + }, + { + "epoch": 14.29, + "learning_rate": 2.3602145028520683e-06, + "loss": 2.2704, + "step": 557900 + }, + { + "epoch": 14.29, + "learning_rate": 2.3516753765754686e-06, + "loss": 2.3049, + "step": 558000 + }, + { + "epoch": 14.3, + "learning_rate": 2.3431362502988693e-06, + "loss": 2.2524, + "step": 558100 + }, + { + "epoch": 14.3, + "learning_rate": 2.33459712402227e-06, + "loss": 2.3015, + "step": 558200 + }, + { + "epoch": 14.3, + "learning_rate": 2.3260579977456707e-06, + "loss": 2.278, + "step": 558300 + }, + { + "epoch": 14.3, + "learning_rate": 2.3175188714690714e-06, + "loss": 2.3302, + "step": 558400 + }, + { + "epoch": 14.31, + "learning_rate": 2.308979745192472e-06, + "loss": 2.2837, + "step": 558500 + }, + { + "epoch": 14.31, + "learning_rate": 2.3004406189158728e-06, + "loss": 2.3509, + "step": 558600 + }, + { + "epoch": 14.31, + "learning_rate": 2.291901492639273e-06, + "loss": 2.3553, + "step": 558700 + }, + { + "epoch": 14.31, + "learning_rate": 2.2833623663626738e-06, + "loss": 2.1597, + "step": 558800 + }, + { + "epoch": 14.32, + "learning_rate": 2.2748232400860745e-06, + "loss": 2.3153, + "step": 558900 + }, + { + "epoch": 14.32, + "learning_rate": 2.266284113809475e-06, + "loss": 2.2201, + "step": 559000 + }, + { + "epoch": 14.32, + "learning_rate": 2.257744987532876e-06, + "loss": 2.3287, + "step": 559100 + }, + { + "epoch": 14.33, + "learning_rate": 2.2492058612562766e-06, + "loss": 2.2204, + "step": 559200 + }, + { + "epoch": 14.33, + "learning_rate": 2.2406667349796773e-06, + "loss": 2.2555, + "step": 559300 + }, + { + "epoch": 14.33, + "learning_rate": 2.2321276087030775e-06, + "loss": 2.3604, + "step": 559400 + }, + { + "epoch": 14.33, + "learning_rate": 2.2235884824264783e-06, + "loss": 2.3152, + "step": 559500 + }, + { + "epoch": 14.34, + "learning_rate": 2.215049356149879e-06, + "loss": 2.2703, + "step": 559600 + }, + { + "epoch": 14.34, + "learning_rate": 2.2065102298732797e-06, + "loss": 2.4655, + "step": 559700 + }, + { + "epoch": 14.34, + "learning_rate": 2.1979711035966804e-06, + "loss": 2.1722, + "step": 559800 + }, + { + "epoch": 14.34, + "learning_rate": 2.189431977320081e-06, + "loss": 2.2994, + "step": 559900 + }, + { + "epoch": 14.35, + "learning_rate": 2.1808928510434813e-06, + "loss": 2.2292, + "step": 560000 + }, + { + "epoch": 14.35, + "learning_rate": 2.1723537247668816e-06, + "loss": 2.5873, + "step": 560100 + }, + { + "epoch": 14.35, + "learning_rate": 2.1638145984902823e-06, + "loss": 2.2973, + "step": 560200 + }, + { + "epoch": 14.35, + "learning_rate": 2.155275472213683e-06, + "loss": 2.4311, + "step": 560300 + }, + { + "epoch": 14.36, + "learning_rate": 2.1467363459370837e-06, + "loss": 2.1605, + "step": 560400 + }, + { + "epoch": 14.36, + "learning_rate": 2.1381972196604844e-06, + "loss": 2.2169, + "step": 560500 + }, + { + "epoch": 14.36, + "learning_rate": 2.129658093383885e-06, + "loss": 2.3904, + "step": 560600 + }, + { + "epoch": 14.36, + "learning_rate": 2.121118967107286e-06, + "loss": 2.2097, + "step": 560700 + }, + { + "epoch": 14.37, + "learning_rate": 2.112579840830686e-06, + "loss": 2.2065, + "step": 560800 + }, + { + "epoch": 14.37, + "learning_rate": 2.104040714554087e-06, + "loss": 2.2926, + "step": 560900 + }, + { + "epoch": 14.37, + "learning_rate": 2.0955015882774875e-06, + "loss": 2.3529, + "step": 561000 + }, + { + "epoch": 14.37, + "learning_rate": 2.086962462000888e-06, + "loss": 2.2113, + "step": 561100 + }, + { + "epoch": 14.38, + "learning_rate": 2.078423335724289e-06, + "loss": 2.1736, + "step": 561200 + }, + { + "epoch": 14.38, + "learning_rate": 2.0698842094476896e-06, + "loss": 2.1903, + "step": 561300 + }, + { + "epoch": 14.38, + "learning_rate": 2.0613450831710903e-06, + "loss": 2.3195, + "step": 561400 + }, + { + "epoch": 14.38, + "learning_rate": 2.0528059568944906e-06, + "loss": 2.3093, + "step": 561500 + }, + { + "epoch": 14.39, + "learning_rate": 2.0442668306178913e-06, + "loss": 2.3438, + "step": 561600 + }, + { + "epoch": 14.39, + "learning_rate": 2.035727704341292e-06, + "loss": 2.2253, + "step": 561700 + }, + { + "epoch": 14.39, + "learning_rate": 2.0271885780646927e-06, + "loss": 2.2596, + "step": 561800 + }, + { + "epoch": 14.39, + "learning_rate": 2.0186494517880934e-06, + "loss": 2.3994, + "step": 561900 + }, + { + "epoch": 14.4, + "learning_rate": 2.010110325511494e-06, + "loss": 2.2312, + "step": 562000 + }, + { + "epoch": 14.4, + "learning_rate": 2.0015711992348944e-06, + "loss": 2.4136, + "step": 562100 + }, + { + "epoch": 14.4, + "learning_rate": 1.9930320729582946e-06, + "loss": 2.305, + "step": 562200 + }, + { + "epoch": 14.4, + "learning_rate": 1.9844929466816953e-06, + "loss": 2.311, + "step": 562300 + }, + { + "epoch": 14.41, + "learning_rate": 1.975953820405096e-06, + "loss": 2.3172, + "step": 562400 + }, + { + "epoch": 14.41, + "learning_rate": 1.9674146941284967e-06, + "loss": 2.253, + "step": 562500 + }, + { + "epoch": 14.41, + "learning_rate": 1.9588755678518974e-06, + "loss": 2.1828, + "step": 562600 + }, + { + "epoch": 14.41, + "learning_rate": 1.950336441575298e-06, + "loss": 2.3561, + "step": 562700 + }, + { + "epoch": 14.42, + "learning_rate": 1.941797315298699e-06, + "loss": 2.3354, + "step": 562800 + }, + { + "epoch": 14.42, + "learning_rate": 1.933258189022099e-06, + "loss": 2.3418, + "step": 562900 + }, + { + "epoch": 14.42, + "learning_rate": 1.9247190627455e-06, + "loss": 2.1337, + "step": 563000 + }, + { + "epoch": 14.43, + "learning_rate": 1.9161799364689005e-06, + "loss": 2.2881, + "step": 563100 + }, + { + "epoch": 14.43, + "learning_rate": 1.9076408101923012e-06, + "loss": 2.1455, + "step": 563200 + }, + { + "epoch": 14.43, + "learning_rate": 1.899101683915702e-06, + "loss": 2.2943, + "step": 563300 + }, + { + "epoch": 14.43, + "learning_rate": 1.8905625576391024e-06, + "loss": 2.4468, + "step": 563400 + }, + { + "epoch": 14.44, + "learning_rate": 1.8820234313625031e-06, + "loss": 2.2302, + "step": 563500 + }, + { + "epoch": 14.44, + "learning_rate": 1.8734843050859038e-06, + "loss": 2.2683, + "step": 563600 + }, + { + "epoch": 14.44, + "learning_rate": 1.8649451788093043e-06, + "loss": 2.3384, + "step": 563700 + }, + { + "epoch": 14.44, + "learning_rate": 1.856406052532705e-06, + "loss": 2.4106, + "step": 563800 + }, + { + "epoch": 14.45, + "learning_rate": 1.8478669262561057e-06, + "loss": 2.3334, + "step": 563900 + }, + { + "epoch": 14.45, + "learning_rate": 1.8393277999795064e-06, + "loss": 2.3531, + "step": 564000 + }, + { + "epoch": 14.45, + "learning_rate": 1.830788673702907e-06, + "loss": 2.3331, + "step": 564100 + }, + { + "epoch": 14.45, + "learning_rate": 1.8222495474263076e-06, + "loss": 2.176, + "step": 564200 + }, + { + "epoch": 14.46, + "learning_rate": 1.8137104211497079e-06, + "loss": 2.3081, + "step": 564300 + }, + { + "epoch": 14.46, + "learning_rate": 1.8051712948731086e-06, + "loss": 2.3887, + "step": 564400 + }, + { + "epoch": 14.46, + "learning_rate": 1.796632168596509e-06, + "loss": 2.3987, + "step": 564500 + }, + { + "epoch": 14.46, + "learning_rate": 1.7880930423199098e-06, + "loss": 2.3948, + "step": 564600 + }, + { + "epoch": 14.47, + "learning_rate": 1.7795539160433105e-06, + "loss": 2.2204, + "step": 564700 + }, + { + "epoch": 14.47, + "learning_rate": 1.771014789766711e-06, + "loss": 2.2972, + "step": 564800 + }, + { + "epoch": 14.47, + "learning_rate": 1.7624756634901117e-06, + "loss": 2.3433, + "step": 564900 + }, + { + "epoch": 14.47, + "learning_rate": 1.7539365372135124e-06, + "loss": 2.2312, + "step": 565000 + }, + { + "epoch": 14.48, + "learning_rate": 1.745397410936913e-06, + "loss": 2.2213, + "step": 565100 + }, + { + "epoch": 14.48, + "learning_rate": 1.7368582846603136e-06, + "loss": 2.3164, + "step": 565200 + }, + { + "epoch": 14.48, + "learning_rate": 1.7283191583837143e-06, + "loss": 2.2808, + "step": 565300 + }, + { + "epoch": 14.48, + "learning_rate": 1.719780032107115e-06, + "loss": 2.0961, + "step": 565400 + }, + { + "epoch": 14.49, + "learning_rate": 1.7112409058305155e-06, + "loss": 2.3284, + "step": 565500 + }, + { + "epoch": 14.49, + "learning_rate": 1.7027017795539162e-06, + "loss": 2.2878, + "step": 565600 + }, + { + "epoch": 14.49, + "learning_rate": 1.6941626532773169e-06, + "loss": 2.22, + "step": 565700 + }, + { + "epoch": 14.49, + "learning_rate": 1.6856235270007173e-06, + "loss": 2.2838, + "step": 565800 + }, + { + "epoch": 14.5, + "learning_rate": 1.677084400724118e-06, + "loss": 2.1683, + "step": 565900 + }, + { + "epoch": 14.5, + "learning_rate": 1.6685452744475187e-06, + "loss": 2.1824, + "step": 566000 + }, + { + "epoch": 14.5, + "learning_rate": 1.6600061481709194e-06, + "loss": 2.2465, + "step": 566100 + }, + { + "epoch": 14.5, + "learning_rate": 1.65146702189432e-06, + "loss": 2.2038, + "step": 566200 + }, + { + "epoch": 14.51, + "learning_rate": 1.6429278956177206e-06, + "loss": 2.2572, + "step": 566300 + }, + { + "epoch": 14.51, + "learning_rate": 1.6343887693411213e-06, + "loss": 2.2173, + "step": 566400 + }, + { + "epoch": 14.51, + "learning_rate": 1.6258496430645216e-06, + "loss": 2.3154, + "step": 566500 + }, + { + "epoch": 14.51, + "learning_rate": 1.617310516787922e-06, + "loss": 2.3011, + "step": 566600 + }, + { + "epoch": 14.52, + "learning_rate": 1.6087713905113228e-06, + "loss": 2.2621, + "step": 566700 + }, + { + "epoch": 14.52, + "learning_rate": 1.6002322642347235e-06, + "loss": 2.3119, + "step": 566800 + }, + { + "epoch": 14.52, + "learning_rate": 1.591693137958124e-06, + "loss": 2.331, + "step": 566900 + }, + { + "epoch": 14.53, + "learning_rate": 1.5831540116815247e-06, + "loss": 2.4661, + "step": 567000 + }, + { + "epoch": 14.53, + "learning_rate": 1.5746148854049254e-06, + "loss": 2.3069, + "step": 567100 + }, + { + "epoch": 14.53, + "learning_rate": 1.566075759128326e-06, + "loss": 2.2827, + "step": 567200 + }, + { + "epoch": 14.53, + "learning_rate": 1.5575366328517266e-06, + "loss": 2.3809, + "step": 567300 + }, + { + "epoch": 14.54, + "learning_rate": 1.5489975065751273e-06, + "loss": 2.2665, + "step": 567400 + }, + { + "epoch": 14.54, + "learning_rate": 1.540458380298528e-06, + "loss": 2.2672, + "step": 567500 + }, + { + "epoch": 14.54, + "learning_rate": 1.5319192540219285e-06, + "loss": 2.3254, + "step": 567600 + }, + { + "epoch": 14.54, + "learning_rate": 1.5233801277453292e-06, + "loss": 2.1762, + "step": 567700 + }, + { + "epoch": 14.55, + "learning_rate": 1.5148410014687299e-06, + "loss": 2.1415, + "step": 567800 + }, + { + "epoch": 14.55, + "learning_rate": 1.5063018751921304e-06, + "loss": 2.2914, + "step": 567900 + }, + { + "epoch": 14.55, + "learning_rate": 1.497762748915531e-06, + "loss": 2.2524, + "step": 568000 + }, + { + "epoch": 14.55, + "learning_rate": 1.4892236226389318e-06, + "loss": 2.2628, + "step": 568100 + }, + { + "epoch": 14.56, + "learning_rate": 1.4806844963623323e-06, + "loss": 2.3373, + "step": 568200 + }, + { + "epoch": 14.56, + "learning_rate": 1.4721453700857328e-06, + "loss": 2.3604, + "step": 568300 + }, + { + "epoch": 14.56, + "learning_rate": 1.4636062438091335e-06, + "loss": 2.2078, + "step": 568400 + }, + { + "epoch": 14.56, + "learning_rate": 1.4550671175325342e-06, + "loss": 2.3737, + "step": 568500 + }, + { + "epoch": 14.57, + "learning_rate": 1.4465279912559346e-06, + "loss": 2.1506, + "step": 568600 + }, + { + "epoch": 14.57, + "learning_rate": 1.4379888649793353e-06, + "loss": 2.4117, + "step": 568700 + }, + { + "epoch": 14.57, + "learning_rate": 1.429449738702736e-06, + "loss": 2.2271, + "step": 568800 + }, + { + "epoch": 14.57, + "learning_rate": 1.4209106124261367e-06, + "loss": 2.3585, + "step": 568900 + }, + { + "epoch": 14.58, + "learning_rate": 1.4123714861495372e-06, + "loss": 2.4083, + "step": 569000 + }, + { + "epoch": 14.58, + "learning_rate": 1.403832359872938e-06, + "loss": 2.2435, + "step": 569100 + }, + { + "epoch": 14.58, + "learning_rate": 1.3952932335963386e-06, + "loss": 2.1267, + "step": 569200 + }, + { + "epoch": 14.58, + "learning_rate": 1.3867541073197391e-06, + "loss": 2.2659, + "step": 569300 + }, + { + "epoch": 14.59, + "learning_rate": 1.3782149810431396e-06, + "loss": 2.234, + "step": 569400 + }, + { + "epoch": 14.59, + "learning_rate": 1.3696758547665403e-06, + "loss": 2.3186, + "step": 569500 + }, + { + "epoch": 14.59, + "learning_rate": 1.361136728489941e-06, + "loss": 2.3054, + "step": 569600 + }, + { + "epoch": 14.59, + "learning_rate": 1.3525976022133415e-06, + "loss": 2.243, + "step": 569700 + }, + { + "epoch": 14.6, + "learning_rate": 1.3440584759367422e-06, + "loss": 2.1987, + "step": 569800 + }, + { + "epoch": 14.6, + "learning_rate": 1.335519349660143e-06, + "loss": 2.4739, + "step": 569900 + }, + { + "epoch": 14.6, + "learning_rate": 1.3269802233835434e-06, + "loss": 2.156, + "step": 570000 + }, + { + "epoch": 14.6, + "learning_rate": 1.318441097106944e-06, + "loss": 2.2558, + "step": 570100 + }, + { + "epoch": 14.61, + "learning_rate": 1.3099019708303448e-06, + "loss": 2.3416, + "step": 570200 + }, + { + "epoch": 14.61, + "learning_rate": 1.3013628445537455e-06, + "loss": 2.3596, + "step": 570300 + }, + { + "epoch": 14.61, + "learning_rate": 1.2928237182771458e-06, + "loss": 2.3077, + "step": 570400 + }, + { + "epoch": 14.61, + "learning_rate": 1.2842845920005465e-06, + "loss": 2.377, + "step": 570500 + }, + { + "epoch": 14.62, + "learning_rate": 1.2757454657239472e-06, + "loss": 2.2575, + "step": 570600 + }, + { + "epoch": 14.62, + "learning_rate": 1.2672063394473479e-06, + "loss": 2.3954, + "step": 570700 + }, + { + "epoch": 14.62, + "learning_rate": 1.2586672131707484e-06, + "loss": 2.1197, + "step": 570800 + }, + { + "epoch": 14.62, + "learning_rate": 1.250128086894149e-06, + "loss": 2.3559, + "step": 570900 + }, + { + "epoch": 14.63, + "learning_rate": 1.2415889606175498e-06, + "loss": 2.1791, + "step": 571000 + }, + { + "epoch": 14.63, + "learning_rate": 1.2330498343409503e-06, + "loss": 2.2896, + "step": 571100 + }, + { + "epoch": 14.63, + "learning_rate": 1.224510708064351e-06, + "loss": 2.3246, + "step": 571200 + }, + { + "epoch": 14.64, + "learning_rate": 1.2159715817877517e-06, + "loss": 2.3038, + "step": 571300 + }, + { + "epoch": 14.64, + "learning_rate": 1.2074324555111522e-06, + "loss": 2.4088, + "step": 571400 + }, + { + "epoch": 14.64, + "learning_rate": 1.1988933292345527e-06, + "loss": 2.2095, + "step": 571500 + }, + { + "epoch": 14.64, + "learning_rate": 1.1903542029579534e-06, + "loss": 2.407, + "step": 571600 + }, + { + "epoch": 14.65, + "learning_rate": 1.181815076681354e-06, + "loss": 2.1209, + "step": 571700 + }, + { + "epoch": 14.65, + "learning_rate": 1.1732759504047545e-06, + "loss": 2.1136, + "step": 571800 + }, + { + "epoch": 14.65, + "learning_rate": 1.1647368241281552e-06, + "loss": 2.3447, + "step": 571900 + }, + { + "epoch": 14.65, + "learning_rate": 1.156197697851556e-06, + "loss": 2.2854, + "step": 572000 + }, + { + "epoch": 14.66, + "learning_rate": 1.1476585715749564e-06, + "loss": 2.2554, + "step": 572100 + }, + { + "epoch": 14.66, + "learning_rate": 1.1391194452983571e-06, + "loss": 2.3975, + "step": 572200 + }, + { + "epoch": 14.66, + "learning_rate": 1.1305803190217578e-06, + "loss": 2.1929, + "step": 572300 + }, + { + "epoch": 14.66, + "learning_rate": 1.1220411927451585e-06, + "loss": 2.3007, + "step": 572400 + }, + { + "epoch": 14.67, + "learning_rate": 1.113502066468559e-06, + "loss": 2.433, + "step": 572500 + }, + { + "epoch": 14.67, + "learning_rate": 1.1049629401919595e-06, + "loss": 2.1499, + "step": 572600 + }, + { + "epoch": 14.67, + "learning_rate": 1.0964238139153602e-06, + "loss": 2.1606, + "step": 572700 + }, + { + "epoch": 14.67, + "learning_rate": 1.087884687638761e-06, + "loss": 2.2014, + "step": 572800 + }, + { + "epoch": 14.68, + "learning_rate": 1.0793455613621614e-06, + "loss": 2.229, + "step": 572900 + }, + { + "epoch": 14.68, + "learning_rate": 1.0708064350855621e-06, + "loss": 2.1355, + "step": 573000 + }, + { + "epoch": 14.68, + "learning_rate": 1.0622673088089628e-06, + "loss": 2.1702, + "step": 573100 + }, + { + "epoch": 14.68, + "learning_rate": 1.0537281825323633e-06, + "loss": 2.3781, + "step": 573200 + }, + { + "epoch": 14.69, + "learning_rate": 1.045189056255764e-06, + "loss": 2.2928, + "step": 573300 + }, + { + "epoch": 14.69, + "learning_rate": 1.0366499299791647e-06, + "loss": 2.2605, + "step": 573400 + }, + { + "epoch": 14.69, + "learning_rate": 1.0281108037025652e-06, + "loss": 2.3233, + "step": 573500 + }, + { + "epoch": 14.69, + "learning_rate": 1.0195716774259657e-06, + "loss": 2.2551, + "step": 573600 + }, + { + "epoch": 14.7, + "learning_rate": 1.0110325511493664e-06, + "loss": 2.4301, + "step": 573700 + }, + { + "epoch": 14.7, + "learning_rate": 1.002493424872767e-06, + "loss": 2.2655, + "step": 573800 + }, + { + "epoch": 14.7, + "learning_rate": 9.939542985961676e-07, + "loss": 2.3177, + "step": 573900 + }, + { + "epoch": 14.7, + "learning_rate": 9.854151723195683e-07, + "loss": 2.2581, + "step": 574000 + }, + { + "epoch": 14.71, + "learning_rate": 9.76876046042969e-07, + "loss": 2.2795, + "step": 574100 + }, + { + "epoch": 14.71, + "learning_rate": 9.683369197663695e-07, + "loss": 2.4648, + "step": 574200 + }, + { + "epoch": 14.71, + "learning_rate": 9.597977934897702e-07, + "loss": 2.2556, + "step": 574300 + }, + { + "epoch": 14.71, + "learning_rate": 9.512586672131709e-07, + "loss": 2.2184, + "step": 574400 + }, + { + "epoch": 14.72, + "learning_rate": 9.427195409365715e-07, + "loss": 2.2618, + "step": 574500 + }, + { + "epoch": 14.72, + "learning_rate": 9.341804146599721e-07, + "loss": 2.3441, + "step": 574600 + }, + { + "epoch": 14.72, + "learning_rate": 9.256412883833725e-07, + "loss": 2.1492, + "step": 574700 + }, + { + "epoch": 14.72, + "learning_rate": 9.171021621067732e-07, + "loss": 2.3674, + "step": 574800 + }, + { + "epoch": 14.73, + "learning_rate": 9.085630358301738e-07, + "loss": 2.1999, + "step": 574900 + }, + { + "epoch": 14.73, + "learning_rate": 9.000239095535744e-07, + "loss": 2.2784, + "step": 575000 + }, + { + "epoch": 14.73, + "learning_rate": 8.914847832769751e-07, + "loss": 2.2225, + "step": 575100 + }, + { + "epoch": 14.74, + "learning_rate": 8.829456570003757e-07, + "loss": 2.2886, + "step": 575200 + }, + { + "epoch": 14.74, + "learning_rate": 8.744065307237764e-07, + "loss": 2.3574, + "step": 575300 + }, + { + "epoch": 14.74, + "learning_rate": 8.65867404447177e-07, + "loss": 2.2308, + "step": 575400 + }, + { + "epoch": 14.74, + "learning_rate": 8.573282781705776e-07, + "loss": 2.2534, + "step": 575500 + }, + { + "epoch": 14.75, + "learning_rate": 8.487891518939783e-07, + "loss": 2.2353, + "step": 575600 + }, + { + "epoch": 14.75, + "learning_rate": 8.402500256173789e-07, + "loss": 2.2454, + "step": 575700 + }, + { + "epoch": 14.75, + "learning_rate": 8.317108993407794e-07, + "loss": 2.4992, + "step": 575800 + }, + { + "epoch": 14.75, + "learning_rate": 8.2317177306418e-07, + "loss": 2.4162, + "step": 575900 + }, + { + "epoch": 14.76, + "learning_rate": 8.146326467875807e-07, + "loss": 2.2603, + "step": 576000 + }, + { + "epoch": 14.76, + "learning_rate": 8.060935205109813e-07, + "loss": 2.4212, + "step": 576100 + }, + { + "epoch": 14.76, + "learning_rate": 7.97554394234382e-07, + "loss": 2.1244, + "step": 576200 + }, + { + "epoch": 14.76, + "learning_rate": 7.890152679577826e-07, + "loss": 2.3865, + "step": 576300 + }, + { + "epoch": 14.77, + "learning_rate": 7.804761416811832e-07, + "loss": 2.2334, + "step": 576400 + }, + { + "epoch": 14.77, + "learning_rate": 7.719370154045839e-07, + "loss": 2.3344, + "step": 576500 + }, + { + "epoch": 14.77, + "learning_rate": 7.633978891279845e-07, + "loss": 2.2259, + "step": 576600 + }, + { + "epoch": 14.77, + "learning_rate": 7.548587628513851e-07, + "loss": 2.4459, + "step": 576700 + }, + { + "epoch": 14.78, + "learning_rate": 7.463196365747857e-07, + "loss": 2.3647, + "step": 576800 + }, + { + "epoch": 14.78, + "learning_rate": 7.377805102981863e-07, + "loss": 2.4897, + "step": 576900 + }, + { + "epoch": 14.78, + "learning_rate": 7.29241384021587e-07, + "loss": 2.3553, + "step": 577000 + }, + { + "epoch": 14.78, + "learning_rate": 7.207022577449876e-07, + "loss": 2.2556, + "step": 577100 + }, + { + "epoch": 14.79, + "learning_rate": 7.121631314683882e-07, + "loss": 2.3734, + "step": 577200 + }, + { + "epoch": 14.79, + "learning_rate": 7.036240051917888e-07, + "loss": 2.414, + "step": 577300 + }, + { + "epoch": 14.79, + "learning_rate": 6.950848789151895e-07, + "loss": 2.2522, + "step": 577400 + }, + { + "epoch": 14.79, + "learning_rate": 6.865457526385901e-07, + "loss": 2.1318, + "step": 577500 + }, + { + "epoch": 14.8, + "learning_rate": 6.780066263619907e-07, + "loss": 2.1168, + "step": 577600 + }, + { + "epoch": 14.8, + "learning_rate": 6.694675000853914e-07, + "loss": 2.3635, + "step": 577700 + }, + { + "epoch": 14.8, + "learning_rate": 6.609283738087918e-07, + "loss": 2.3013, + "step": 577800 + }, + { + "epoch": 14.8, + "learning_rate": 6.523892475321925e-07, + "loss": 2.3134, + "step": 577900 + }, + { + "epoch": 14.81, + "learning_rate": 6.438501212555931e-07, + "loss": 2.3957, + "step": 578000 + }, + { + "epoch": 14.81, + "learning_rate": 6.353109949789938e-07, + "loss": 2.1935, + "step": 578100 + }, + { + "epoch": 14.81, + "learning_rate": 6.267718687023944e-07, + "loss": 2.2694, + "step": 578200 + }, + { + "epoch": 14.81, + "learning_rate": 6.18232742425795e-07, + "loss": 2.3684, + "step": 578300 + }, + { + "epoch": 14.82, + "learning_rate": 6.096936161491956e-07, + "loss": 2.2628, + "step": 578400 + }, + { + "epoch": 14.82, + "learning_rate": 6.011544898725962e-07, + "loss": 2.2747, + "step": 578500 + }, + { + "epoch": 14.82, + "learning_rate": 5.926153635959969e-07, + "loss": 2.2606, + "step": 578600 + }, + { + "epoch": 14.82, + "learning_rate": 5.840762373193975e-07, + "loss": 2.3139, + "step": 578700 + }, + { + "epoch": 14.83, + "learning_rate": 5.755371110427981e-07, + "loss": 2.329, + "step": 578800 + }, + { + "epoch": 14.83, + "learning_rate": 5.669979847661987e-07, + "loss": 2.2978, + "step": 578900 + }, + { + "epoch": 14.83, + "learning_rate": 5.584588584895993e-07, + "loss": 2.1899, + "step": 579000 + }, + { + "epoch": 14.84, + "learning_rate": 5.49919732213e-07, + "loss": 2.2918, + "step": 579100 + }, + { + "epoch": 14.84, + "learning_rate": 5.413806059364006e-07, + "loss": 2.1719, + "step": 579200 + }, + { + "epoch": 14.84, + "learning_rate": 5.328414796598013e-07, + "loss": 2.4, + "step": 579300 + }, + { + "epoch": 14.84, + "learning_rate": 5.243023533832018e-07, + "loss": 2.2283, + "step": 579400 + }, + { + "epoch": 14.85, + "learning_rate": 5.157632271066025e-07, + "loss": 2.4378, + "step": 579500 + }, + { + "epoch": 14.85, + "learning_rate": 5.072241008300031e-07, + "loss": 2.2668, + "step": 579600 + }, + { + "epoch": 14.85, + "learning_rate": 4.986849745534037e-07, + "loss": 2.1497, + "step": 579700 + }, + { + "epoch": 14.85, + "learning_rate": 4.901458482768044e-07, + "loss": 2.2747, + "step": 579800 + }, + { + "epoch": 14.86, + "learning_rate": 4.816067220002049e-07, + "loss": 2.2221, + "step": 579900 + }, + { + "epoch": 14.86, + "learning_rate": 4.730675957236056e-07, + "loss": 2.3611, + "step": 580000 + }, + { + "epoch": 14.86, + "learning_rate": 4.645284694470062e-07, + "loss": 2.2331, + "step": 580100 + }, + { + "epoch": 14.86, + "learning_rate": 4.559893431704068e-07, + "loss": 2.3034, + "step": 580200 + }, + { + "epoch": 14.87, + "learning_rate": 4.4745021689380747e-07, + "loss": 2.2968, + "step": 580300 + }, + { + "epoch": 14.87, + "learning_rate": 4.389110906172081e-07, + "loss": 2.3666, + "step": 580400 + }, + { + "epoch": 14.87, + "learning_rate": 4.3037196434060866e-07, + "loss": 2.3805, + "step": 580500 + }, + { + "epoch": 14.87, + "learning_rate": 4.218328380640093e-07, + "loss": 2.3279, + "step": 580600 + }, + { + "epoch": 14.88, + "learning_rate": 4.132937117874099e-07, + "loss": 2.1707, + "step": 580700 + }, + { + "epoch": 14.88, + "learning_rate": 4.0475458551081055e-07, + "loss": 2.0922, + "step": 580800 + }, + { + "epoch": 14.88, + "learning_rate": 3.962154592342112e-07, + "loss": 2.3051, + "step": 580900 + }, + { + "epoch": 14.88, + "learning_rate": 3.876763329576118e-07, + "loss": 2.1741, + "step": 581000 + }, + { + "epoch": 14.89, + "learning_rate": 3.7913720668101245e-07, + "loss": 2.3168, + "step": 581100 + }, + { + "epoch": 14.89, + "learning_rate": 3.7059808040441304e-07, + "loss": 2.2626, + "step": 581200 + }, + { + "epoch": 14.89, + "learning_rate": 3.620589541278137e-07, + "loss": 2.435, + "step": 581300 + }, + { + "epoch": 14.89, + "learning_rate": 3.535198278512143e-07, + "loss": 2.3655, + "step": 581400 + }, + { + "epoch": 14.9, + "learning_rate": 3.449807015746149e-07, + "loss": 2.2915, + "step": 581500 + }, + { + "epoch": 14.9, + "learning_rate": 3.3644157529801553e-07, + "loss": 2.2458, + "step": 581600 + }, + { + "epoch": 14.9, + "learning_rate": 3.279024490214161e-07, + "loss": 2.23, + "step": 581700 + }, + { + "epoch": 14.9, + "learning_rate": 3.1936332274481677e-07, + "loss": 2.3178, + "step": 581800 + }, + { + "epoch": 14.91, + "learning_rate": 3.108241964682174e-07, + "loss": 2.3426, + "step": 581900 + }, + { + "epoch": 14.91, + "learning_rate": 3.02285070191618e-07, + "loss": 2.1817, + "step": 582000 + }, + { + "epoch": 14.91, + "learning_rate": 2.937459439150186e-07, + "loss": 2.1318, + "step": 582100 + }, + { + "epoch": 14.91, + "learning_rate": 2.8520681763841926e-07, + "loss": 2.3357, + "step": 582200 + }, + { + "epoch": 14.92, + "learning_rate": 2.7666769136181985e-07, + "loss": 2.4255, + "step": 582300 + }, + { + "epoch": 14.92, + "learning_rate": 2.681285650852205e-07, + "loss": 2.3859, + "step": 582400 + }, + { + "epoch": 14.92, + "learning_rate": 2.595894388086211e-07, + "loss": 2.2031, + "step": 582500 + }, + { + "epoch": 14.92, + "learning_rate": 2.5105031253202175e-07, + "loss": 2.296, + "step": 582600 + }, + { + "epoch": 14.93, + "learning_rate": 2.4251118625542234e-07, + "loss": 2.1092, + "step": 582700 + }, + { + "epoch": 14.93, + "learning_rate": 2.3397205997882296e-07, + "loss": 2.2336, + "step": 582800 + }, + { + "epoch": 14.93, + "learning_rate": 2.2543293370222359e-07, + "loss": 2.3101, + "step": 582900 + }, + { + "epoch": 14.93, + "learning_rate": 2.1689380742562423e-07, + "loss": 2.2666, + "step": 583000 + }, + { + "epoch": 14.94, + "learning_rate": 2.0835468114902483e-07, + "loss": 2.31, + "step": 583100 + }, + { + "epoch": 14.94, + "learning_rate": 1.9981555487242548e-07, + "loss": 2.2143, + "step": 583200 + }, + { + "epoch": 14.94, + "learning_rate": 1.9127642859582607e-07, + "loss": 2.3068, + "step": 583300 + }, + { + "epoch": 14.95, + "learning_rate": 1.827373023192267e-07, + "loss": 2.2878, + "step": 583400 + }, + { + "epoch": 14.95, + "learning_rate": 1.7419817604262734e-07, + "loss": 2.131, + "step": 583500 + }, + { + "epoch": 14.95, + "learning_rate": 1.6565904976602794e-07, + "loss": 2.1412, + "step": 583600 + }, + { + "epoch": 14.95, + "learning_rate": 1.5711992348942856e-07, + "loss": 2.3785, + "step": 583700 + }, + { + "epoch": 14.96, + "learning_rate": 1.485807972128292e-07, + "loss": 2.3242, + "step": 583800 + }, + { + "epoch": 14.96, + "learning_rate": 1.4004167093622983e-07, + "loss": 2.2279, + "step": 583900 + }, + { + "epoch": 14.96, + "learning_rate": 1.3150254465963042e-07, + "loss": 2.3326, + "step": 584000 + }, + { + "epoch": 14.96, + "learning_rate": 1.2296341838303105e-07, + "loss": 2.2253, + "step": 584100 + }, + { + "epoch": 14.97, + "learning_rate": 1.1442429210643168e-07, + "loss": 2.3627, + "step": 584200 + }, + { + "epoch": 14.97, + "learning_rate": 1.058851658298323e-07, + "loss": 2.1505, + "step": 584300 + }, + { + "epoch": 14.97, + "learning_rate": 9.734603955323291e-08, + "loss": 2.4305, + "step": 584400 + }, + { + "epoch": 14.97, + "learning_rate": 8.880691327663355e-08, + "loss": 2.312, + "step": 584500 + }, + { + "epoch": 14.98, + "learning_rate": 8.026778700003416e-08, + "loss": 2.1861, + "step": 584600 + }, + { + "epoch": 14.98, + "learning_rate": 7.172866072343478e-08, + "loss": 2.3235, + "step": 584700 + }, + { + "epoch": 14.98, + "learning_rate": 6.31895344468354e-08, + "loss": 2.2686, + "step": 584800 + }, + { + "epoch": 14.98, + "learning_rate": 5.465040817023603e-08, + "loss": 2.2017, + "step": 584900 + }, + { + "epoch": 14.99, + "learning_rate": 4.611128189363664e-08, + "loss": 2.1246, + "step": 585000 + }, + { + "epoch": 14.99, + "learning_rate": 3.7572155617037264e-08, + "loss": 2.2377, + "step": 585100 + }, + { + "epoch": 14.99, + "learning_rate": 2.9033029340437886e-08, + "loss": 2.396, + "step": 585200 + }, + { + "epoch": 14.99, + "learning_rate": 2.0493903063838508e-08, + "loss": 2.3171, + "step": 585300 + }, + { + "epoch": 15.0, + "learning_rate": 1.195477678723913e-08, + "loss": 2.271, + "step": 585400 + }, + { + "epoch": 15.0, + "learning_rate": 3.4156505106397517e-09, + "loss": 2.2955, + "step": 585500 + }, + { + "epoch": 15.0, + "step": 585540, + "total_flos": 4.952930904057427e+16, + "train_runtime": 597.6283, + "train_samples_per_second": 979.773 + } + ], + "max_steps": 585540, + "num_train_epochs": 15, + "total_flos": 4.952930904057427e+16, + "trial_name": null, + "trial_params": null +}