{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "global_step": 585540, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.99914608737234e-05, "loss": 8.1433, "step": 100 }, { "epoch": 0.01, "learning_rate": 4.9982921747446805e-05, "loss": 5.3387, "step": 200 }, { "epoch": 0.01, "learning_rate": 4.99743826211702e-05, "loss": 4.6505, "step": 300 }, { "epoch": 0.01, "learning_rate": 4.99658434948936e-05, "loss": 4.5935, "step": 400 }, { "epoch": 0.01, "learning_rate": 4.9957304368617e-05, "loss": 4.1372, "step": 500 }, { "epoch": 0.02, "learning_rate": 4.9948765242340405e-05, "loss": 4.2621, "step": 600 }, { "epoch": 0.02, "learning_rate": 4.9940226116063806e-05, "loss": 4.2878, "step": 700 }, { "epoch": 0.02, "learning_rate": 4.993168698978721e-05, "loss": 4.3196, "step": 800 }, { "epoch": 0.02, "learning_rate": 4.99231478635106e-05, "loss": 4.0911, "step": 900 }, { "epoch": 0.03, "learning_rate": 4.991460873723401e-05, "loss": 4.2543, "step": 1000 }, { "epoch": 0.03, "learning_rate": 4.9906069610957406e-05, "loss": 4.2923, "step": 1100 }, { "epoch": 0.03, "learning_rate": 4.989753048468081e-05, "loss": 4.3948, "step": 1200 }, { "epoch": 0.03, "learning_rate": 4.988899135840421e-05, "loss": 4.1853, "step": 1300 }, { "epoch": 0.04, "learning_rate": 4.988045223212761e-05, "loss": 3.8924, "step": 1400 }, { "epoch": 0.04, "learning_rate": 4.987191310585101e-05, "loss": 4.4658, "step": 1500 }, { "epoch": 0.04, "learning_rate": 4.9863373979574414e-05, "loss": 3.9246, "step": 1600 }, { "epoch": 0.04, "learning_rate": 4.985483485329781e-05, "loss": 4.2261, "step": 1700 }, { "epoch": 0.05, "learning_rate": 4.984629572702122e-05, "loss": 4.5428, "step": 1800 }, { "epoch": 0.05, "learning_rate": 4.983775660074461e-05, "loss": 3.9668, "step": 1900 }, { "epoch": 0.05, "learning_rate": 4.982921747446801e-05, "loss": 4.412, "step": 2000 }, { "epoch": 0.05, "learning_rate": 4.9820678348191415e-05, "loss": 4.2224, "step": 2100 }, { "epoch": 0.06, "learning_rate": 4.9812139221914816e-05, "loss": 4.4448, "step": 2200 }, { "epoch": 0.06, "learning_rate": 4.980360009563821e-05, "loss": 4.1606, "step": 2300 }, { "epoch": 0.06, "learning_rate": 4.979506096936162e-05, "loss": 4.335, "step": 2400 }, { "epoch": 0.06, "learning_rate": 4.9786521843085014e-05, "loss": 3.945, "step": 2500 }, { "epoch": 0.07, "learning_rate": 4.977798271680842e-05, "loss": 4.0949, "step": 2600 }, { "epoch": 0.07, "learning_rate": 4.976944359053182e-05, "loss": 4.4423, "step": 2700 }, { "epoch": 0.07, "learning_rate": 4.976090446425522e-05, "loss": 3.9127, "step": 2800 }, { "epoch": 0.07, "learning_rate": 4.975236533797862e-05, "loss": 3.8504, "step": 2900 }, { "epoch": 0.08, "learning_rate": 4.974382621170202e-05, "loss": 4.1752, "step": 3000 }, { "epoch": 0.08, "learning_rate": 4.9735287085425417e-05, "loss": 4.2515, "step": 3100 }, { "epoch": 0.08, "learning_rate": 4.9726747959148825e-05, "loss": 4.1318, "step": 3200 }, { "epoch": 0.08, "learning_rate": 4.971820883287222e-05, "loss": 3.993, "step": 3300 }, { "epoch": 0.09, "learning_rate": 4.970966970659562e-05, "loss": 3.8842, "step": 3400 }, { "epoch": 0.09, "learning_rate": 4.970113058031902e-05, "loss": 4.1074, "step": 3500 }, { "epoch": 0.09, "learning_rate": 4.9692591454042424e-05, "loss": 4.203, "step": 3600 }, { "epoch": 0.09, "learning_rate": 4.9684052327765826e-05, "loss": 3.884, "step": 3700 }, { "epoch": 0.1, "learning_rate": 4.967551320148923e-05, "loss": 3.9207, "step": 3800 }, { "epoch": 0.1, "learning_rate": 4.966697407521262e-05, "loss": 3.9057, "step": 3900 }, { "epoch": 0.1, "learning_rate": 4.965843494893603e-05, "loss": 4.1704, "step": 4000 }, { "epoch": 0.11, "learning_rate": 4.9649895822659425e-05, "loss": 3.9624, "step": 4100 }, { "epoch": 0.11, "learning_rate": 4.964135669638283e-05, "loss": 4.2685, "step": 4200 }, { "epoch": 0.11, "learning_rate": 4.963281757010623e-05, "loss": 4.0752, "step": 4300 }, { "epoch": 0.11, "learning_rate": 4.962427844382963e-05, "loss": 4.2952, "step": 4400 }, { "epoch": 0.12, "learning_rate": 4.961573931755303e-05, "loss": 3.9692, "step": 4500 }, { "epoch": 0.12, "learning_rate": 4.960720019127643e-05, "loss": 4.1518, "step": 4600 }, { "epoch": 0.12, "learning_rate": 4.959866106499983e-05, "loss": 4.0681, "step": 4700 }, { "epoch": 0.12, "learning_rate": 4.9590121938723236e-05, "loss": 4.0123, "step": 4800 }, { "epoch": 0.13, "learning_rate": 4.958158281244663e-05, "loss": 4.1172, "step": 4900 }, { "epoch": 0.13, "learning_rate": 4.957304368617003e-05, "loss": 3.9204, "step": 5000 }, { "epoch": 0.13, "learning_rate": 4.9564504559893434e-05, "loss": 4.104, "step": 5100 }, { "epoch": 0.13, "learning_rate": 4.9555965433616835e-05, "loss": 4.1304, "step": 5200 }, { "epoch": 0.14, "learning_rate": 4.954742630734023e-05, "loss": 4.1386, "step": 5300 }, { "epoch": 0.14, "learning_rate": 4.953888718106364e-05, "loss": 4.0127, "step": 5400 }, { "epoch": 0.14, "learning_rate": 4.953034805478703e-05, "loss": 3.9139, "step": 5500 }, { "epoch": 0.14, "learning_rate": 4.9521808928510435e-05, "loss": 3.9059, "step": 5600 }, { "epoch": 0.15, "learning_rate": 4.9513269802233836e-05, "loss": 3.8485, "step": 5700 }, { "epoch": 0.15, "learning_rate": 4.950473067595724e-05, "loss": 4.0259, "step": 5800 }, { "epoch": 0.15, "learning_rate": 4.949619154968064e-05, "loss": 3.9548, "step": 5900 }, { "epoch": 0.15, "learning_rate": 4.948765242340404e-05, "loss": 4.1649, "step": 6000 }, { "epoch": 0.16, "learning_rate": 4.9479113297127436e-05, "loss": 3.9737, "step": 6100 }, { "epoch": 0.16, "learning_rate": 4.9470574170850844e-05, "loss": 4.2423, "step": 6200 }, { "epoch": 0.16, "learning_rate": 4.946203504457424e-05, "loss": 4.0339, "step": 6300 }, { "epoch": 0.16, "learning_rate": 4.945349591829764e-05, "loss": 4.0146, "step": 6400 }, { "epoch": 0.17, "learning_rate": 4.944495679202104e-05, "loss": 4.097, "step": 6500 }, { "epoch": 0.17, "learning_rate": 4.9436417665744444e-05, "loss": 3.8055, "step": 6600 }, { "epoch": 0.17, "learning_rate": 4.9427878539467845e-05, "loss": 4.1866, "step": 6700 }, { "epoch": 0.17, "learning_rate": 4.941933941319125e-05, "loss": 4.2445, "step": 6800 }, { "epoch": 0.18, "learning_rate": 4.941080028691464e-05, "loss": 3.9666, "step": 6900 }, { "epoch": 0.18, "learning_rate": 4.940226116063805e-05, "loss": 3.9317, "step": 7000 }, { "epoch": 0.18, "learning_rate": 4.9393722034361445e-05, "loss": 3.6663, "step": 7100 }, { "epoch": 0.18, "learning_rate": 4.9385182908084846e-05, "loss": 3.7571, "step": 7200 }, { "epoch": 0.19, "learning_rate": 4.937664378180825e-05, "loss": 3.772, "step": 7300 }, { "epoch": 0.19, "learning_rate": 4.936810465553165e-05, "loss": 3.5315, "step": 7400 }, { "epoch": 0.19, "learning_rate": 4.935956552925505e-05, "loss": 3.9508, "step": 7500 }, { "epoch": 0.19, "learning_rate": 4.935102640297845e-05, "loss": 4.0053, "step": 7600 }, { "epoch": 0.2, "learning_rate": 4.934248727670185e-05, "loss": 3.8292, "step": 7700 }, { "epoch": 0.2, "learning_rate": 4.9333948150425255e-05, "loss": 3.9688, "step": 7800 }, { "epoch": 0.2, "learning_rate": 4.932540902414865e-05, "loss": 3.8298, "step": 7900 }, { "epoch": 0.2, "learning_rate": 4.931686989787205e-05, "loss": 3.8508, "step": 8000 }, { "epoch": 0.21, "learning_rate": 4.930833077159545e-05, "loss": 3.6922, "step": 8100 }, { "epoch": 0.21, "learning_rate": 4.9299791645318855e-05, "loss": 3.8841, "step": 8200 }, { "epoch": 0.21, "learning_rate": 4.929125251904225e-05, "loss": 3.7595, "step": 8300 }, { "epoch": 0.22, "learning_rate": 4.928271339276566e-05, "loss": 3.6968, "step": 8400 }, { "epoch": 0.22, "learning_rate": 4.927417426648905e-05, "loss": 3.7239, "step": 8500 }, { "epoch": 0.22, "learning_rate": 4.9265635140212454e-05, "loss": 3.7007, "step": 8600 }, { "epoch": 0.22, "learning_rate": 4.9257096013935856e-05, "loss": 3.762, "step": 8700 }, { "epoch": 0.23, "learning_rate": 4.924855688765926e-05, "loss": 3.912, "step": 8800 }, { "epoch": 0.23, "learning_rate": 4.924001776138266e-05, "loss": 3.8828, "step": 8900 }, { "epoch": 0.23, "learning_rate": 4.923147863510606e-05, "loss": 3.7351, "step": 9000 }, { "epoch": 0.23, "learning_rate": 4.9222939508829455e-05, "loss": 3.7754, "step": 9100 }, { "epoch": 0.24, "learning_rate": 4.9214400382552864e-05, "loss": 4.0024, "step": 9200 }, { "epoch": 0.24, "learning_rate": 4.920586125627626e-05, "loss": 3.946, "step": 9300 }, { "epoch": 0.24, "learning_rate": 4.919732212999966e-05, "loss": 3.7708, "step": 9400 }, { "epoch": 0.24, "learning_rate": 4.918878300372306e-05, "loss": 3.7317, "step": 9500 }, { "epoch": 0.25, "learning_rate": 4.918024387744646e-05, "loss": 3.9509, "step": 9600 }, { "epoch": 0.25, "learning_rate": 4.9171704751169864e-05, "loss": 3.5076, "step": 9700 }, { "epoch": 0.25, "learning_rate": 4.9163165624893266e-05, "loss": 3.9101, "step": 9800 }, { "epoch": 0.25, "learning_rate": 4.915462649861666e-05, "loss": 3.9417, "step": 9900 }, { "epoch": 0.26, "learning_rate": 4.914608737234007e-05, "loss": 3.9119, "step": 10000 }, { "epoch": 0.26, "learning_rate": 4.9137548246063464e-05, "loss": 3.9257, "step": 10100 }, { "epoch": 0.26, "learning_rate": 4.9129009119786865e-05, "loss": 4.0386, "step": 10200 }, { "epoch": 0.26, "learning_rate": 4.912046999351027e-05, "loss": 3.942, "step": 10300 }, { "epoch": 0.27, "learning_rate": 4.911193086723367e-05, "loss": 3.7424, "step": 10400 }, { "epoch": 0.27, "learning_rate": 4.910339174095706e-05, "loss": 3.5835, "step": 10500 }, { "epoch": 0.27, "learning_rate": 4.909485261468047e-05, "loss": 3.9946, "step": 10600 }, { "epoch": 0.27, "learning_rate": 4.9086313488403866e-05, "loss": 3.9583, "step": 10700 }, { "epoch": 0.28, "learning_rate": 4.907777436212727e-05, "loss": 3.6759, "step": 10800 }, { "epoch": 0.28, "learning_rate": 4.906923523585067e-05, "loss": 3.8039, "step": 10900 }, { "epoch": 0.28, "learning_rate": 4.906069610957407e-05, "loss": 3.8028, "step": 11000 }, { "epoch": 0.28, "learning_rate": 4.905215698329747e-05, "loss": 3.6921, "step": 11100 }, { "epoch": 0.29, "learning_rate": 4.9043617857020874e-05, "loss": 3.5227, "step": 11200 }, { "epoch": 0.29, "learning_rate": 4.903507873074427e-05, "loss": 3.6822, "step": 11300 }, { "epoch": 0.29, "learning_rate": 4.902653960446768e-05, "loss": 3.5773, "step": 11400 }, { "epoch": 0.29, "learning_rate": 4.901800047819107e-05, "loss": 3.7838, "step": 11500 }, { "epoch": 0.3, "learning_rate": 4.9009461351914474e-05, "loss": 3.7591, "step": 11600 }, { "epoch": 0.3, "learning_rate": 4.9000922225637875e-05, "loss": 3.8082, "step": 11700 }, { "epoch": 0.3, "learning_rate": 4.899238309936128e-05, "loss": 3.7714, "step": 11800 }, { "epoch": 0.3, "learning_rate": 4.898384397308468e-05, "loss": 3.7589, "step": 11900 }, { "epoch": 0.31, "learning_rate": 4.897530484680808e-05, "loss": 3.8622, "step": 12000 }, { "epoch": 0.31, "learning_rate": 4.8966765720531475e-05, "loss": 3.7458, "step": 12100 }, { "epoch": 0.31, "learning_rate": 4.895822659425488e-05, "loss": 3.5666, "step": 12200 }, { "epoch": 0.32, "learning_rate": 4.894968746797828e-05, "loss": 3.773, "step": 12300 }, { "epoch": 0.32, "learning_rate": 4.894114834170168e-05, "loss": 3.792, "step": 12400 }, { "epoch": 0.32, "learning_rate": 4.893260921542508e-05, "loss": 3.8636, "step": 12500 }, { "epoch": 0.32, "learning_rate": 4.892407008914848e-05, "loss": 3.9016, "step": 12600 }, { "epoch": 0.33, "learning_rate": 4.8915530962871884e-05, "loss": 3.5966, "step": 12700 }, { "epoch": 0.33, "learning_rate": 4.8906991836595285e-05, "loss": 3.7381, "step": 12800 }, { "epoch": 0.33, "learning_rate": 4.889845271031868e-05, "loss": 3.8202, "step": 12900 }, { "epoch": 0.33, "learning_rate": 4.888991358404209e-05, "loss": 3.7938, "step": 13000 }, { "epoch": 0.34, "learning_rate": 4.888137445776548e-05, "loss": 3.668, "step": 13100 }, { "epoch": 0.34, "learning_rate": 4.8872835331488885e-05, "loss": 3.598, "step": 13200 }, { "epoch": 0.34, "learning_rate": 4.8864296205212286e-05, "loss": 3.8545, "step": 13300 }, { "epoch": 0.34, "learning_rate": 4.885575707893569e-05, "loss": 3.7405, "step": 13400 }, { "epoch": 0.35, "learning_rate": 4.884721795265908e-05, "loss": 3.7744, "step": 13500 }, { "epoch": 0.35, "learning_rate": 4.883867882638249e-05, "loss": 3.7296, "step": 13600 }, { "epoch": 0.35, "learning_rate": 4.8830139700105886e-05, "loss": 3.7762, "step": 13700 }, { "epoch": 0.35, "learning_rate": 4.882160057382929e-05, "loss": 3.6065, "step": 13800 }, { "epoch": 0.36, "learning_rate": 4.881306144755269e-05, "loss": 3.8279, "step": 13900 }, { "epoch": 0.36, "learning_rate": 4.880452232127609e-05, "loss": 3.7685, "step": 14000 }, { "epoch": 0.36, "learning_rate": 4.879598319499949e-05, "loss": 3.7034, "step": 14100 }, { "epoch": 0.36, "learning_rate": 4.8787444068722893e-05, "loss": 3.963, "step": 14200 }, { "epoch": 0.37, "learning_rate": 4.877890494244629e-05, "loss": 3.6013, "step": 14300 }, { "epoch": 0.37, "learning_rate": 4.8770365816169697e-05, "loss": 3.6733, "step": 14400 }, { "epoch": 0.37, "learning_rate": 4.876182668989309e-05, "loss": 3.4549, "step": 14500 }, { "epoch": 0.37, "learning_rate": 4.875328756361649e-05, "loss": 3.5551, "step": 14600 }, { "epoch": 0.38, "learning_rate": 4.8744748437339894e-05, "loss": 3.6137, "step": 14700 }, { "epoch": 0.38, "learning_rate": 4.8736209311063296e-05, "loss": 3.9688, "step": 14800 }, { "epoch": 0.38, "learning_rate": 4.87276701847867e-05, "loss": 3.7047, "step": 14900 }, { "epoch": 0.38, "learning_rate": 4.87191310585101e-05, "loss": 3.5083, "step": 15000 }, { "epoch": 0.39, "learning_rate": 4.8710591932233494e-05, "loss": 3.7918, "step": 15100 }, { "epoch": 0.39, "learning_rate": 4.87020528059569e-05, "loss": 3.7162, "step": 15200 }, { "epoch": 0.39, "learning_rate": 4.86935136796803e-05, "loss": 3.6648, "step": 15300 }, { "epoch": 0.39, "learning_rate": 4.86849745534037e-05, "loss": 3.5669, "step": 15400 }, { "epoch": 0.4, "learning_rate": 4.86764354271271e-05, "loss": 3.659, "step": 15500 }, { "epoch": 0.4, "learning_rate": 4.86678963008505e-05, "loss": 3.9466, "step": 15600 }, { "epoch": 0.4, "learning_rate": 4.8659357174573896e-05, "loss": 3.4854, "step": 15700 }, { "epoch": 0.4, "learning_rate": 4.8650818048297305e-05, "loss": 3.7612, "step": 15800 }, { "epoch": 0.41, "learning_rate": 4.86422789220207e-05, "loss": 3.7588, "step": 15900 }, { "epoch": 0.41, "learning_rate": 4.86337397957441e-05, "loss": 3.4317, "step": 16000 }, { "epoch": 0.41, "learning_rate": 4.86252006694675e-05, "loss": 3.7622, "step": 16100 }, { "epoch": 0.42, "learning_rate": 4.8616661543190904e-05, "loss": 3.5345, "step": 16200 }, { "epoch": 0.42, "learning_rate": 4.8608122416914306e-05, "loss": 3.7285, "step": 16300 }, { "epoch": 0.42, "learning_rate": 4.859958329063771e-05, "loss": 3.5887, "step": 16400 }, { "epoch": 0.42, "learning_rate": 4.85910441643611e-05, "loss": 3.4669, "step": 16500 }, { "epoch": 0.43, "learning_rate": 4.858250503808451e-05, "loss": 3.535, "step": 16600 }, { "epoch": 0.43, "learning_rate": 4.8573965911807905e-05, "loss": 3.6845, "step": 16700 }, { "epoch": 0.43, "learning_rate": 4.856542678553131e-05, "loss": 3.6075, "step": 16800 }, { "epoch": 0.43, "learning_rate": 4.855688765925471e-05, "loss": 3.7537, "step": 16900 }, { "epoch": 0.44, "learning_rate": 4.854834853297811e-05, "loss": 3.6431, "step": 17000 }, { "epoch": 0.44, "learning_rate": 4.853980940670151e-05, "loss": 3.4635, "step": 17100 }, { "epoch": 0.44, "learning_rate": 4.853127028042491e-05, "loss": 3.5606, "step": 17200 }, { "epoch": 0.44, "learning_rate": 4.852273115414831e-05, "loss": 3.9068, "step": 17300 }, { "epoch": 0.45, "learning_rate": 4.8514192027871716e-05, "loss": 3.6972, "step": 17400 }, { "epoch": 0.45, "learning_rate": 4.850565290159511e-05, "loss": 3.6389, "step": 17500 }, { "epoch": 0.45, "learning_rate": 4.849711377531851e-05, "loss": 3.6478, "step": 17600 }, { "epoch": 0.45, "learning_rate": 4.8488574649041914e-05, "loss": 3.8012, "step": 17700 }, { "epoch": 0.46, "learning_rate": 4.848003552276531e-05, "loss": 3.5439, "step": 17800 }, { "epoch": 0.46, "learning_rate": 4.847149639648872e-05, "loss": 3.44, "step": 17900 }, { "epoch": 0.46, "learning_rate": 4.846295727021211e-05, "loss": 3.6266, "step": 18000 }, { "epoch": 0.46, "learning_rate": 4.845441814393551e-05, "loss": 3.2828, "step": 18100 }, { "epoch": 0.47, "learning_rate": 4.8445879017658915e-05, "loss": 3.2803, "step": 18200 }, { "epoch": 0.47, "learning_rate": 4.8437339891382316e-05, "loss": 3.7721, "step": 18300 }, { "epoch": 0.47, "learning_rate": 4.842880076510571e-05, "loss": 3.7414, "step": 18400 }, { "epoch": 0.47, "learning_rate": 4.842026163882912e-05, "loss": 3.6759, "step": 18500 }, { "epoch": 0.48, "learning_rate": 4.8411722512552514e-05, "loss": 3.6668, "step": 18600 }, { "epoch": 0.48, "learning_rate": 4.8403183386275916e-05, "loss": 3.6371, "step": 18700 }, { "epoch": 0.48, "learning_rate": 4.839464425999932e-05, "loss": 3.3712, "step": 18800 }, { "epoch": 0.48, "learning_rate": 4.838610513372272e-05, "loss": 3.5995, "step": 18900 }, { "epoch": 0.49, "learning_rate": 4.837756600744612e-05, "loss": 3.6567, "step": 19000 }, { "epoch": 0.49, "learning_rate": 4.836902688116952e-05, "loss": 3.626, "step": 19100 }, { "epoch": 0.49, "learning_rate": 4.836048775489292e-05, "loss": 3.7768, "step": 19200 }, { "epoch": 0.49, "learning_rate": 4.8351948628616325e-05, "loss": 3.2899, "step": 19300 }, { "epoch": 0.5, "learning_rate": 4.834340950233972e-05, "loss": 3.6196, "step": 19400 }, { "epoch": 0.5, "learning_rate": 4.833487037606312e-05, "loss": 3.4995, "step": 19500 }, { "epoch": 0.5, "learning_rate": 4.832633124978652e-05, "loss": 3.5778, "step": 19600 }, { "epoch": 0.5, "learning_rate": 4.8317792123509924e-05, "loss": 3.6503, "step": 19700 }, { "epoch": 0.51, "learning_rate": 4.8309252997233326e-05, "loss": 3.5208, "step": 19800 }, { "epoch": 0.51, "learning_rate": 4.830071387095673e-05, "loss": 3.5284, "step": 19900 }, { "epoch": 0.51, "learning_rate": 4.829217474468012e-05, "loss": 3.4265, "step": 20000 }, { "epoch": 0.51, "learning_rate": 4.828363561840353e-05, "loss": 3.597, "step": 20100 }, { "epoch": 0.52, "learning_rate": 4.8275096492126925e-05, "loss": 3.7741, "step": 20200 }, { "epoch": 0.52, "learning_rate": 4.826655736585033e-05, "loss": 3.8414, "step": 20300 }, { "epoch": 0.52, "learning_rate": 4.825801823957373e-05, "loss": 3.5326, "step": 20400 }, { "epoch": 0.53, "learning_rate": 4.824947911329713e-05, "loss": 3.7442, "step": 20500 }, { "epoch": 0.53, "learning_rate": 4.8240939987020525e-05, "loss": 3.5901, "step": 20600 }, { "epoch": 0.53, "learning_rate": 4.823240086074393e-05, "loss": 3.8139, "step": 20700 }, { "epoch": 0.53, "learning_rate": 4.822386173446733e-05, "loss": 3.5049, "step": 20800 }, { "epoch": 0.54, "learning_rate": 4.821532260819073e-05, "loss": 3.4308, "step": 20900 }, { "epoch": 0.54, "learning_rate": 4.820678348191413e-05, "loss": 3.5181, "step": 21000 }, { "epoch": 0.54, "learning_rate": 4.819824435563753e-05, "loss": 3.6863, "step": 21100 }, { "epoch": 0.54, "learning_rate": 4.8189705229360934e-05, "loss": 3.4282, "step": 21200 }, { "epoch": 0.55, "learning_rate": 4.8181166103084336e-05, "loss": 3.4732, "step": 21300 }, { "epoch": 0.55, "learning_rate": 4.817262697680773e-05, "loss": 3.6055, "step": 21400 }, { "epoch": 0.55, "learning_rate": 4.816408785053114e-05, "loss": 3.4187, "step": 21500 }, { "epoch": 0.55, "learning_rate": 4.8155548724254534e-05, "loss": 3.4109, "step": 21600 }, { "epoch": 0.56, "learning_rate": 4.8147009597977935e-05, "loss": 3.5652, "step": 21700 }, { "epoch": 0.56, "learning_rate": 4.813847047170134e-05, "loss": 3.5527, "step": 21800 }, { "epoch": 0.56, "learning_rate": 4.812993134542474e-05, "loss": 3.5741, "step": 21900 }, { "epoch": 0.56, "learning_rate": 4.812139221914814e-05, "loss": 3.6138, "step": 22000 }, { "epoch": 0.57, "learning_rate": 4.811285309287154e-05, "loss": 3.5562, "step": 22100 }, { "epoch": 0.57, "learning_rate": 4.8104313966594936e-05, "loss": 3.5844, "step": 22200 }, { "epoch": 0.57, "learning_rate": 4.8095774840318344e-05, "loss": 3.4481, "step": 22300 }, { "epoch": 0.57, "learning_rate": 4.808723571404174e-05, "loss": 3.547, "step": 22400 }, { "epoch": 0.58, "learning_rate": 4.807869658776514e-05, "loss": 3.5046, "step": 22500 }, { "epoch": 0.58, "learning_rate": 4.807015746148854e-05, "loss": 3.5041, "step": 22600 }, { "epoch": 0.58, "learning_rate": 4.8061618335211944e-05, "loss": 3.5439, "step": 22700 }, { "epoch": 0.58, "learning_rate": 4.8053079208935345e-05, "loss": 3.5554, "step": 22800 }, { "epoch": 0.59, "learning_rate": 4.804454008265875e-05, "loss": 3.517, "step": 22900 }, { "epoch": 0.59, "learning_rate": 4.803600095638214e-05, "loss": 3.5763, "step": 23000 }, { "epoch": 0.59, "learning_rate": 4.802746183010555e-05, "loss": 3.5727, "step": 23100 }, { "epoch": 0.59, "learning_rate": 4.8018922703828945e-05, "loss": 3.6164, "step": 23200 }, { "epoch": 0.6, "learning_rate": 4.8010383577552346e-05, "loss": 3.5565, "step": 23300 }, { "epoch": 0.6, "learning_rate": 4.800184445127575e-05, "loss": 3.2922, "step": 23400 }, { "epoch": 0.6, "learning_rate": 4.799330532499915e-05, "loss": 3.4609, "step": 23500 }, { "epoch": 0.6, "learning_rate": 4.7984766198722544e-05, "loss": 3.6773, "step": 23600 }, { "epoch": 0.61, "learning_rate": 4.797622707244595e-05, "loss": 3.4789, "step": 23700 }, { "epoch": 0.61, "learning_rate": 4.796768794616935e-05, "loss": 3.9325, "step": 23800 }, { "epoch": 0.61, "learning_rate": 4.795914881989275e-05, "loss": 3.3577, "step": 23900 }, { "epoch": 0.61, "learning_rate": 4.795060969361615e-05, "loss": 3.4149, "step": 24000 }, { "epoch": 0.62, "learning_rate": 4.794207056733955e-05, "loss": 3.6172, "step": 24100 }, { "epoch": 0.62, "learning_rate": 4.7933531441062953e-05, "loss": 3.4819, "step": 24200 }, { "epoch": 0.62, "learning_rate": 4.7924992314786355e-05, "loss": 3.4195, "step": 24300 }, { "epoch": 0.63, "learning_rate": 4.791645318850975e-05, "loss": 3.3149, "step": 24400 }, { "epoch": 0.63, "learning_rate": 4.790791406223316e-05, "loss": 3.3554, "step": 24500 }, { "epoch": 0.63, "learning_rate": 4.789937493595655e-05, "loss": 3.5358, "step": 24600 }, { "epoch": 0.63, "learning_rate": 4.7890835809679954e-05, "loss": 3.4567, "step": 24700 }, { "epoch": 0.64, "learning_rate": 4.7882296683403356e-05, "loss": 3.5939, "step": 24800 }, { "epoch": 0.64, "learning_rate": 4.787375755712676e-05, "loss": 3.3348, "step": 24900 }, { "epoch": 0.64, "learning_rate": 4.786521843085016e-05, "loss": 3.5004, "step": 25000 }, { "epoch": 0.64, "learning_rate": 4.785667930457356e-05, "loss": 3.5305, "step": 25100 }, { "epoch": 0.65, "learning_rate": 4.7848140178296955e-05, "loss": 3.3193, "step": 25200 }, { "epoch": 0.65, "learning_rate": 4.7839601052020364e-05, "loss": 3.4184, "step": 25300 }, { "epoch": 0.65, "learning_rate": 4.783106192574376e-05, "loss": 3.4644, "step": 25400 }, { "epoch": 0.65, "learning_rate": 4.782252279946716e-05, "loss": 3.5732, "step": 25500 }, { "epoch": 0.66, "learning_rate": 4.781398367319056e-05, "loss": 3.2855, "step": 25600 }, { "epoch": 0.66, "learning_rate": 4.780544454691396e-05, "loss": 3.464, "step": 25700 }, { "epoch": 0.66, "learning_rate": 4.779690542063736e-05, "loss": 3.5029, "step": 25800 }, { "epoch": 0.66, "learning_rate": 4.7788366294360766e-05, "loss": 3.4364, "step": 25900 }, { "epoch": 0.67, "learning_rate": 4.777982716808416e-05, "loss": 3.3555, "step": 26000 }, { "epoch": 0.67, "learning_rate": 4.777128804180756e-05, "loss": 3.6529, "step": 26100 }, { "epoch": 0.67, "learning_rate": 4.7762748915530964e-05, "loss": 3.537, "step": 26200 }, { "epoch": 0.67, "learning_rate": 4.7754209789254366e-05, "loss": 3.4883, "step": 26300 }, { "epoch": 0.68, "learning_rate": 4.774567066297777e-05, "loss": 3.6947, "step": 26400 }, { "epoch": 0.68, "learning_rate": 4.773713153670117e-05, "loss": 3.457, "step": 26500 }, { "epoch": 0.68, "learning_rate": 4.7728592410424563e-05, "loss": 3.4468, "step": 26600 }, { "epoch": 0.68, "learning_rate": 4.772005328414797e-05, "loss": 3.5284, "step": 26700 }, { "epoch": 0.69, "learning_rate": 4.7711514157871367e-05, "loss": 3.221, "step": 26800 }, { "epoch": 0.69, "learning_rate": 4.770297503159477e-05, "loss": 3.4848, "step": 26900 }, { "epoch": 0.69, "learning_rate": 4.769443590531817e-05, "loss": 3.4874, "step": 27000 }, { "epoch": 0.69, "learning_rate": 4.768589677904157e-05, "loss": 3.435, "step": 27100 }, { "epoch": 0.7, "learning_rate": 4.767735765276497e-05, "loss": 3.5241, "step": 27200 }, { "epoch": 0.7, "learning_rate": 4.7668818526488374e-05, "loss": 3.6542, "step": 27300 }, { "epoch": 0.7, "learning_rate": 4.766027940021177e-05, "loss": 3.3834, "step": 27400 }, { "epoch": 0.7, "learning_rate": 4.765174027393518e-05, "loss": 3.6834, "step": 27500 }, { "epoch": 0.71, "learning_rate": 4.764320114765857e-05, "loss": 3.2529, "step": 27600 }, { "epoch": 0.71, "learning_rate": 4.7634662021381974e-05, "loss": 3.3623, "step": 27700 }, { "epoch": 0.71, "learning_rate": 4.7626122895105375e-05, "loss": 3.4693, "step": 27800 }, { "epoch": 0.71, "learning_rate": 4.761758376882878e-05, "loss": 3.6151, "step": 27900 }, { "epoch": 0.72, "learning_rate": 4.760904464255218e-05, "loss": 3.4634, "step": 28000 }, { "epoch": 0.72, "learning_rate": 4.760050551627558e-05, "loss": 3.2379, "step": 28100 }, { "epoch": 0.72, "learning_rate": 4.7591966389998975e-05, "loss": 3.5468, "step": 28200 }, { "epoch": 0.72, "learning_rate": 4.758342726372238e-05, "loss": 3.3572, "step": 28300 }, { "epoch": 0.73, "learning_rate": 4.757488813744578e-05, "loss": 3.4741, "step": 28400 }, { "epoch": 0.73, "learning_rate": 4.756634901116918e-05, "loss": 3.5941, "step": 28500 }, { "epoch": 0.73, "learning_rate": 4.755780988489258e-05, "loss": 3.2845, "step": 28600 }, { "epoch": 0.74, "learning_rate": 4.754927075861598e-05, "loss": 3.3763, "step": 28700 }, { "epoch": 0.74, "learning_rate": 4.754073163233938e-05, "loss": 3.4421, "step": 28800 }, { "epoch": 0.74, "learning_rate": 4.7532192506062786e-05, "loss": 3.3501, "step": 28900 }, { "epoch": 0.74, "learning_rate": 4.752365337978618e-05, "loss": 3.4032, "step": 29000 }, { "epoch": 0.75, "learning_rate": 4.751511425350958e-05, "loss": 3.1186, "step": 29100 }, { "epoch": 0.75, "learning_rate": 4.7506575127232983e-05, "loss": 3.5095, "step": 29200 }, { "epoch": 0.75, "learning_rate": 4.7498036000956385e-05, "loss": 3.1795, "step": 29300 }, { "epoch": 0.75, "learning_rate": 4.7489496874679787e-05, "loss": 3.5747, "step": 29400 }, { "epoch": 0.76, "learning_rate": 4.748095774840319e-05, "loss": 3.6188, "step": 29500 }, { "epoch": 0.76, "learning_rate": 4.747241862212658e-05, "loss": 3.3566, "step": 29600 }, { "epoch": 0.76, "learning_rate": 4.746387949584999e-05, "loss": 3.547, "step": 29700 }, { "epoch": 0.76, "learning_rate": 4.7455340369573386e-05, "loss": 3.1518, "step": 29800 }, { "epoch": 0.77, "learning_rate": 4.744680124329679e-05, "loss": 3.466, "step": 29900 }, { "epoch": 0.77, "learning_rate": 4.743826211702019e-05, "loss": 3.2771, "step": 30000 }, { "epoch": 0.77, "learning_rate": 4.742972299074359e-05, "loss": 3.5729, "step": 30100 }, { "epoch": 0.77, "learning_rate": 4.742118386446699e-05, "loss": 3.4516, "step": 30200 }, { "epoch": 0.78, "learning_rate": 4.7412644738190394e-05, "loss": 3.3993, "step": 30300 }, { "epoch": 0.78, "learning_rate": 4.740410561191379e-05, "loss": 3.4406, "step": 30400 }, { "epoch": 0.78, "learning_rate": 4.73955664856372e-05, "loss": 3.5511, "step": 30500 }, { "epoch": 0.78, "learning_rate": 4.738702735936059e-05, "loss": 3.2725, "step": 30600 }, { "epoch": 0.79, "learning_rate": 4.737848823308399e-05, "loss": 3.4044, "step": 30700 }, { "epoch": 0.79, "learning_rate": 4.7369949106807395e-05, "loss": 3.6242, "step": 30800 }, { "epoch": 0.79, "learning_rate": 4.7361409980530796e-05, "loss": 3.6762, "step": 30900 }, { "epoch": 0.79, "learning_rate": 4.735287085425419e-05, "loss": 3.4278, "step": 31000 }, { "epoch": 0.8, "learning_rate": 4.73443317279776e-05, "loss": 3.3284, "step": 31100 }, { "epoch": 0.8, "learning_rate": 4.7335792601700994e-05, "loss": 3.5034, "step": 31200 }, { "epoch": 0.8, "learning_rate": 4.73272534754244e-05, "loss": 3.4058, "step": 31300 }, { "epoch": 0.8, "learning_rate": 4.73187143491478e-05, "loss": 3.3646, "step": 31400 }, { "epoch": 0.81, "learning_rate": 4.73101752228712e-05, "loss": 3.4112, "step": 31500 }, { "epoch": 0.81, "learning_rate": 4.73016360965946e-05, "loss": 3.475, "step": 31600 }, { "epoch": 0.81, "learning_rate": 4.7293096970318e-05, "loss": 3.6318, "step": 31700 }, { "epoch": 0.81, "learning_rate": 4.7284557844041397e-05, "loss": 3.2754, "step": 31800 }, { "epoch": 0.82, "learning_rate": 4.7276018717764805e-05, "loss": 3.3146, "step": 31900 }, { "epoch": 0.82, "learning_rate": 4.72674795914882e-05, "loss": 3.37, "step": 32000 }, { "epoch": 0.82, "learning_rate": 4.72589404652116e-05, "loss": 3.4869, "step": 32100 }, { "epoch": 0.82, "learning_rate": 4.7250401338935e-05, "loss": 3.4892, "step": 32200 }, { "epoch": 0.83, "learning_rate": 4.7241862212658404e-05, "loss": 3.217, "step": 32300 }, { "epoch": 0.83, "learning_rate": 4.7233323086381806e-05, "loss": 3.4166, "step": 32400 }, { "epoch": 0.83, "learning_rate": 4.722478396010521e-05, "loss": 3.4837, "step": 32500 }, { "epoch": 0.84, "learning_rate": 4.72162448338286e-05, "loss": 3.399, "step": 32600 }, { "epoch": 0.84, "learning_rate": 4.720770570755201e-05, "loss": 3.6814, "step": 32700 }, { "epoch": 0.84, "learning_rate": 4.7199166581275405e-05, "loss": 3.5863, "step": 32800 }, { "epoch": 0.84, "learning_rate": 4.719062745499881e-05, "loss": 3.3289, "step": 32900 }, { "epoch": 0.85, "learning_rate": 4.718208832872221e-05, "loss": 3.2249, "step": 33000 }, { "epoch": 0.85, "learning_rate": 4.717354920244561e-05, "loss": 3.6567, "step": 33100 }, { "epoch": 0.85, "learning_rate": 4.716501007616901e-05, "loss": 3.471, "step": 33200 }, { "epoch": 0.85, "learning_rate": 4.715647094989241e-05, "loss": 3.2949, "step": 33300 }, { "epoch": 0.86, "learning_rate": 4.714793182361581e-05, "loss": 3.3271, "step": 33400 }, { "epoch": 0.86, "learning_rate": 4.7139392697339216e-05, "loss": 3.2935, "step": 33500 }, { "epoch": 0.86, "learning_rate": 4.713085357106261e-05, "loss": 3.4567, "step": 33600 }, { "epoch": 0.86, "learning_rate": 4.712231444478601e-05, "loss": 3.5018, "step": 33700 }, { "epoch": 0.87, "learning_rate": 4.7113775318509414e-05, "loss": 3.462, "step": 33800 }, { "epoch": 0.87, "learning_rate": 4.7105236192232816e-05, "loss": 3.4016, "step": 33900 }, { "epoch": 0.87, "learning_rate": 4.709669706595621e-05, "loss": 3.3442, "step": 34000 }, { "epoch": 0.87, "learning_rate": 4.708815793967962e-05, "loss": 3.4873, "step": 34100 }, { "epoch": 0.88, "learning_rate": 4.7079618813403013e-05, "loss": 3.3928, "step": 34200 }, { "epoch": 0.88, "learning_rate": 4.7071079687126415e-05, "loss": 3.6974, "step": 34300 }, { "epoch": 0.88, "learning_rate": 4.7062540560849816e-05, "loss": 3.316, "step": 34400 }, { "epoch": 0.88, "learning_rate": 4.705400143457322e-05, "loss": 3.3236, "step": 34500 }, { "epoch": 0.89, "learning_rate": 4.704546230829662e-05, "loss": 3.53, "step": 34600 }, { "epoch": 0.89, "learning_rate": 4.703692318202002e-05, "loss": 3.5239, "step": 34700 }, { "epoch": 0.89, "learning_rate": 4.7028384055743416e-05, "loss": 3.1612, "step": 34800 }, { "epoch": 0.89, "learning_rate": 4.7019844929466824e-05, "loss": 3.3604, "step": 34900 }, { "epoch": 0.9, "learning_rate": 4.701130580319022e-05, "loss": 3.259, "step": 35000 }, { "epoch": 0.9, "learning_rate": 4.700276667691362e-05, "loss": 3.3522, "step": 35100 }, { "epoch": 0.9, "learning_rate": 4.699422755063702e-05, "loss": 3.3189, "step": 35200 }, { "epoch": 0.9, "learning_rate": 4.698568842436042e-05, "loss": 3.457, "step": 35300 }, { "epoch": 0.91, "learning_rate": 4.6977149298083825e-05, "loss": 3.3757, "step": 35400 }, { "epoch": 0.91, "learning_rate": 4.696861017180722e-05, "loss": 3.3143, "step": 35500 }, { "epoch": 0.91, "learning_rate": 4.696007104553062e-05, "loss": 3.329, "step": 35600 }, { "epoch": 0.91, "learning_rate": 4.695153191925402e-05, "loss": 3.4637, "step": 35700 }, { "epoch": 0.92, "learning_rate": 4.6942992792977425e-05, "loss": 3.3483, "step": 35800 }, { "epoch": 0.92, "learning_rate": 4.693445366670082e-05, "loss": 3.4662, "step": 35900 }, { "epoch": 0.92, "learning_rate": 4.692591454042423e-05, "loss": 3.2127, "step": 36000 }, { "epoch": 0.92, "learning_rate": 4.691737541414762e-05, "loss": 3.2416, "step": 36100 }, { "epoch": 0.93, "learning_rate": 4.690883628787103e-05, "loss": 3.1665, "step": 36200 }, { "epoch": 0.93, "learning_rate": 4.6900297161594426e-05, "loss": 3.371, "step": 36300 }, { "epoch": 0.93, "learning_rate": 4.689175803531783e-05, "loss": 3.2884, "step": 36400 }, { "epoch": 0.94, "learning_rate": 4.688321890904123e-05, "loss": 3.3204, "step": 36500 }, { "epoch": 0.94, "learning_rate": 4.687467978276463e-05, "loss": 3.2915, "step": 36600 }, { "epoch": 0.94, "learning_rate": 4.6866140656488025e-05, "loss": 3.2848, "step": 36700 }, { "epoch": 0.94, "learning_rate": 4.685760153021143e-05, "loss": 3.2604, "step": 36800 }, { "epoch": 0.95, "learning_rate": 4.684906240393483e-05, "loss": 3.1758, "step": 36900 }, { "epoch": 0.95, "learning_rate": 4.684052327765823e-05, "loss": 3.3302, "step": 37000 }, { "epoch": 0.95, "learning_rate": 4.683198415138163e-05, "loss": 3.5299, "step": 37100 }, { "epoch": 0.95, "learning_rate": 4.682344502510503e-05, "loss": 3.3508, "step": 37200 }, { "epoch": 0.96, "learning_rate": 4.6814905898828434e-05, "loss": 3.3845, "step": 37300 }, { "epoch": 0.96, "learning_rate": 4.6806366772551836e-05, "loss": 3.4054, "step": 37400 }, { "epoch": 0.96, "learning_rate": 4.679782764627523e-05, "loss": 3.4561, "step": 37500 }, { "epoch": 0.96, "learning_rate": 4.678928851999864e-05, "loss": 3.4069, "step": 37600 }, { "epoch": 0.97, "learning_rate": 4.6780749393722034e-05, "loss": 3.2533, "step": 37700 }, { "epoch": 0.97, "learning_rate": 4.6772210267445435e-05, "loss": 3.4234, "step": 37800 }, { "epoch": 0.97, "learning_rate": 4.676367114116884e-05, "loss": 3.1182, "step": 37900 }, { "epoch": 0.97, "learning_rate": 4.675513201489224e-05, "loss": 3.2555, "step": 38000 }, { "epoch": 0.98, "learning_rate": 4.674659288861564e-05, "loss": 3.5934, "step": 38100 }, { "epoch": 0.98, "learning_rate": 4.673805376233904e-05, "loss": 3.3511, "step": 38200 }, { "epoch": 0.98, "learning_rate": 4.6729514636062436e-05, "loss": 3.3294, "step": 38300 }, { "epoch": 0.98, "learning_rate": 4.6720975509785845e-05, "loss": 3.2316, "step": 38400 }, { "epoch": 0.99, "learning_rate": 4.671243638350924e-05, "loss": 3.2372, "step": 38500 }, { "epoch": 0.99, "learning_rate": 4.670389725723264e-05, "loss": 3.43, "step": 38600 }, { "epoch": 0.99, "learning_rate": 4.669535813095604e-05, "loss": 3.2801, "step": 38700 }, { "epoch": 0.99, "learning_rate": 4.6686819004679444e-05, "loss": 3.4245, "step": 38800 }, { "epoch": 1.0, "learning_rate": 4.667827987840284e-05, "loss": 3.3985, "step": 38900 }, { "epoch": 1.0, "learning_rate": 4.666974075212625e-05, "loss": 3.2582, "step": 39000 }, { "epoch": 1.0, "learning_rate": 4.666120162584964e-05, "loss": 3.445, "step": 39100 }, { "epoch": 1.0, "learning_rate": 4.665266249957304e-05, "loss": 3.3439, "step": 39200 }, { "epoch": 1.01, "learning_rate": 4.6644123373296445e-05, "loss": 3.3569, "step": 39300 }, { "epoch": 1.01, "learning_rate": 4.6635584247019846e-05, "loss": 3.2744, "step": 39400 }, { "epoch": 1.01, "learning_rate": 4.662704512074325e-05, "loss": 3.4693, "step": 39500 }, { "epoch": 1.01, "learning_rate": 4.661850599446665e-05, "loss": 3.2883, "step": 39600 }, { "epoch": 1.02, "learning_rate": 4.6609966868190044e-05, "loss": 3.3045, "step": 39700 }, { "epoch": 1.02, "learning_rate": 4.660142774191345e-05, "loss": 3.4343, "step": 39800 }, { "epoch": 1.02, "learning_rate": 4.659288861563685e-05, "loss": 3.2372, "step": 39900 }, { "epoch": 1.02, "learning_rate": 4.658434948936025e-05, "loss": 3.4615, "step": 40000 }, { "epoch": 1.03, "learning_rate": 4.657581036308365e-05, "loss": 3.4103, "step": 40100 }, { "epoch": 1.03, "learning_rate": 4.656727123680705e-05, "loss": 3.5328, "step": 40200 }, { "epoch": 1.03, "learning_rate": 4.6558732110530454e-05, "loss": 3.4379, "step": 40300 }, { "epoch": 1.03, "learning_rate": 4.6550192984253855e-05, "loss": 3.3792, "step": 40400 }, { "epoch": 1.04, "learning_rate": 4.654165385797725e-05, "loss": 3.5508, "step": 40500 }, { "epoch": 1.04, "learning_rate": 4.653311473170066e-05, "loss": 3.3502, "step": 40600 }, { "epoch": 1.04, "learning_rate": 4.652457560542405e-05, "loss": 3.4316, "step": 40700 }, { "epoch": 1.05, "learning_rate": 4.6516036479147455e-05, "loss": 3.1812, "step": 40800 }, { "epoch": 1.05, "learning_rate": 4.6507497352870856e-05, "loss": 3.348, "step": 40900 }, { "epoch": 1.05, "learning_rate": 4.649895822659426e-05, "loss": 3.3069, "step": 41000 }, { "epoch": 1.05, "learning_rate": 4.649041910031766e-05, "loss": 3.2655, "step": 41100 }, { "epoch": 1.06, "learning_rate": 4.648187997404106e-05, "loss": 3.1583, "step": 41200 }, { "epoch": 1.06, "learning_rate": 4.6473340847764456e-05, "loss": 3.3218, "step": 41300 }, { "epoch": 1.06, "learning_rate": 4.6464801721487864e-05, "loss": 3.4447, "step": 41400 }, { "epoch": 1.06, "learning_rate": 4.645626259521126e-05, "loss": 3.2127, "step": 41500 }, { "epoch": 1.07, "learning_rate": 4.644772346893466e-05, "loss": 3.1546, "step": 41600 }, { "epoch": 1.07, "learning_rate": 4.643918434265806e-05, "loss": 3.5458, "step": 41700 }, { "epoch": 1.07, "learning_rate": 4.643064521638146e-05, "loss": 3.3776, "step": 41800 }, { "epoch": 1.07, "learning_rate": 4.642210609010486e-05, "loss": 3.3053, "step": 41900 }, { "epoch": 1.08, "learning_rate": 4.6413566963828266e-05, "loss": 3.4147, "step": 42000 }, { "epoch": 1.08, "learning_rate": 4.640502783755166e-05, "loss": 3.308, "step": 42100 }, { "epoch": 1.08, "learning_rate": 4.639648871127506e-05, "loss": 3.1021, "step": 42200 }, { "epoch": 1.08, "learning_rate": 4.6387949584998464e-05, "loss": 3.4419, "step": 42300 }, { "epoch": 1.09, "learning_rate": 4.6379410458721866e-05, "loss": 3.1076, "step": 42400 }, { "epoch": 1.09, "learning_rate": 4.637087133244527e-05, "loss": 3.2782, "step": 42500 }, { "epoch": 1.09, "learning_rate": 4.636233220616867e-05, "loss": 3.43, "step": 42600 }, { "epoch": 1.09, "learning_rate": 4.6353793079892064e-05, "loss": 3.4279, "step": 42700 }, { "epoch": 1.1, "learning_rate": 4.634525395361547e-05, "loss": 3.2666, "step": 42800 }, { "epoch": 1.1, "learning_rate": 4.633671482733887e-05, "loss": 3.3548, "step": 42900 }, { "epoch": 1.1, "learning_rate": 4.632817570106227e-05, "loss": 3.378, "step": 43000 }, { "epoch": 1.1, "learning_rate": 4.631963657478567e-05, "loss": 3.2853, "step": 43100 }, { "epoch": 1.11, "learning_rate": 4.631109744850907e-05, "loss": 3.3193, "step": 43200 }, { "epoch": 1.11, "learning_rate": 4.630255832223247e-05, "loss": 3.4065, "step": 43300 }, { "epoch": 1.11, "learning_rate": 4.6294019195955875e-05, "loss": 3.2967, "step": 43400 }, { "epoch": 1.11, "learning_rate": 4.628548006967927e-05, "loss": 3.3564, "step": 43500 }, { "epoch": 1.12, "learning_rate": 4.627694094340268e-05, "loss": 3.4165, "step": 43600 }, { "epoch": 1.12, "learning_rate": 4.626840181712607e-05, "loss": 3.2119, "step": 43700 }, { "epoch": 1.12, "learning_rate": 4.6259862690849474e-05, "loss": 3.2635, "step": 43800 }, { "epoch": 1.12, "learning_rate": 4.6251323564572875e-05, "loss": 3.2283, "step": 43900 }, { "epoch": 1.13, "learning_rate": 4.624278443829628e-05, "loss": 3.2392, "step": 44000 }, { "epoch": 1.13, "learning_rate": 4.623424531201967e-05, "loss": 3.274, "step": 44100 }, { "epoch": 1.13, "learning_rate": 4.622570618574308e-05, "loss": 3.1144, "step": 44200 }, { "epoch": 1.13, "learning_rate": 4.6217167059466475e-05, "loss": 3.2448, "step": 44300 }, { "epoch": 1.14, "learning_rate": 4.6208627933189876e-05, "loss": 3.4071, "step": 44400 }, { "epoch": 1.14, "learning_rate": 4.620008880691328e-05, "loss": 3.2883, "step": 44500 }, { "epoch": 1.14, "learning_rate": 4.619154968063668e-05, "loss": 3.3453, "step": 44600 }, { "epoch": 1.15, "learning_rate": 4.618301055436008e-05, "loss": 3.237, "step": 44700 }, { "epoch": 1.15, "learning_rate": 4.617447142808348e-05, "loss": 3.4076, "step": 44800 }, { "epoch": 1.15, "learning_rate": 4.616593230180688e-05, "loss": 3.2343, "step": 44900 }, { "epoch": 1.15, "learning_rate": 4.6157393175530286e-05, "loss": 3.0996, "step": 45000 }, { "epoch": 1.16, "learning_rate": 4.614885404925368e-05, "loss": 3.2876, "step": 45100 }, { "epoch": 1.16, "learning_rate": 4.614031492297708e-05, "loss": 3.3522, "step": 45200 }, { "epoch": 1.16, "learning_rate": 4.6131775796700484e-05, "loss": 3.274, "step": 45300 }, { "epoch": 1.16, "learning_rate": 4.6123236670423885e-05, "loss": 3.168, "step": 45400 }, { "epoch": 1.17, "learning_rate": 4.611469754414729e-05, "loss": 3.3319, "step": 45500 }, { "epoch": 1.17, "learning_rate": 4.610615841787069e-05, "loss": 3.2365, "step": 45600 }, { "epoch": 1.17, "learning_rate": 4.609761929159408e-05, "loss": 3.208, "step": 45700 }, { "epoch": 1.17, "learning_rate": 4.608908016531749e-05, "loss": 3.0745, "step": 45800 }, { "epoch": 1.18, "learning_rate": 4.6080541039040886e-05, "loss": 3.1878, "step": 45900 }, { "epoch": 1.18, "learning_rate": 4.607200191276429e-05, "loss": 3.3813, "step": 46000 }, { "epoch": 1.18, "learning_rate": 4.606346278648769e-05, "loss": 3.2259, "step": 46100 }, { "epoch": 1.18, "learning_rate": 4.605492366021109e-05, "loss": 3.355, "step": 46200 }, { "epoch": 1.19, "learning_rate": 4.604638453393449e-05, "loss": 3.3909, "step": 46300 }, { "epoch": 1.19, "learning_rate": 4.6037845407657894e-05, "loss": 3.2687, "step": 46400 }, { "epoch": 1.19, "learning_rate": 4.602930628138129e-05, "loss": 3.2326, "step": 46500 }, { "epoch": 1.19, "learning_rate": 4.60207671551047e-05, "loss": 3.3135, "step": 46600 }, { "epoch": 1.2, "learning_rate": 4.601222802882809e-05, "loss": 3.5051, "step": 46700 }, { "epoch": 1.2, "learning_rate": 4.600368890255149e-05, "loss": 3.009, "step": 46800 }, { "epoch": 1.2, "learning_rate": 4.5995149776274895e-05, "loss": 3.1782, "step": 46900 }, { "epoch": 1.2, "learning_rate": 4.5986610649998296e-05, "loss": 3.0154, "step": 47000 }, { "epoch": 1.21, "learning_rate": 4.597807152372169e-05, "loss": 3.3357, "step": 47100 }, { "epoch": 1.21, "learning_rate": 4.59695323974451e-05, "loss": 3.2224, "step": 47200 }, { "epoch": 1.21, "learning_rate": 4.5960993271168494e-05, "loss": 3.1395, "step": 47300 }, { "epoch": 1.21, "learning_rate": 4.5952454144891896e-05, "loss": 3.4303, "step": 47400 }, { "epoch": 1.22, "learning_rate": 4.59439150186153e-05, "loss": 3.1074, "step": 47500 }, { "epoch": 1.22, "learning_rate": 4.59353758923387e-05, "loss": 3.3658, "step": 47600 }, { "epoch": 1.22, "learning_rate": 4.59268367660621e-05, "loss": 3.3173, "step": 47700 }, { "epoch": 1.22, "learning_rate": 4.59182976397855e-05, "loss": 2.9707, "step": 47800 }, { "epoch": 1.23, "learning_rate": 4.59097585135089e-05, "loss": 3.3756, "step": 47900 }, { "epoch": 1.23, "learning_rate": 4.5901219387232305e-05, "loss": 3.0612, "step": 48000 }, { "epoch": 1.23, "learning_rate": 4.58926802609557e-05, "loss": 3.1551, "step": 48100 }, { "epoch": 1.23, "learning_rate": 4.58841411346791e-05, "loss": 3.3508, "step": 48200 }, { "epoch": 1.24, "learning_rate": 4.58756020084025e-05, "loss": 3.3484, "step": 48300 }, { "epoch": 1.24, "learning_rate": 4.5867062882125904e-05, "loss": 3.2109, "step": 48400 }, { "epoch": 1.24, "learning_rate": 4.5858523755849306e-05, "loss": 3.2551, "step": 48500 }, { "epoch": 1.25, "learning_rate": 4.584998462957271e-05, "loss": 3.1801, "step": 48600 }, { "epoch": 1.25, "learning_rate": 4.58414455032961e-05, "loss": 3.2943, "step": 48700 }, { "epoch": 1.25, "learning_rate": 4.583290637701951e-05, "loss": 3.1567, "step": 48800 }, { "epoch": 1.25, "learning_rate": 4.5824367250742905e-05, "loss": 3.6534, "step": 48900 }, { "epoch": 1.26, "learning_rate": 4.581582812446631e-05, "loss": 3.2714, "step": 49000 }, { "epoch": 1.26, "learning_rate": 4.580728899818971e-05, "loss": 3.2884, "step": 49100 }, { "epoch": 1.26, "learning_rate": 4.579874987191311e-05, "loss": 3.0826, "step": 49200 }, { "epoch": 1.26, "learning_rate": 4.5790210745636505e-05, "loss": 3.2807, "step": 49300 }, { "epoch": 1.27, "learning_rate": 4.578167161935991e-05, "loss": 3.3775, "step": 49400 }, { "epoch": 1.27, "learning_rate": 4.577313249308331e-05, "loss": 3.0698, "step": 49500 }, { "epoch": 1.27, "learning_rate": 4.576459336680671e-05, "loss": 3.299, "step": 49600 }, { "epoch": 1.27, "learning_rate": 4.575605424053011e-05, "loss": 3.1239, "step": 49700 }, { "epoch": 1.28, "learning_rate": 4.574751511425351e-05, "loss": 3.2043, "step": 49800 }, { "epoch": 1.28, "learning_rate": 4.5738975987976914e-05, "loss": 3.3373, "step": 49900 }, { "epoch": 1.28, "learning_rate": 4.5730436861700316e-05, "loss": 3.6232, "step": 50000 }, { "epoch": 1.28, "learning_rate": 4.572189773542371e-05, "loss": 3.1563, "step": 50100 }, { "epoch": 1.29, "learning_rate": 4.571335860914712e-05, "loss": 3.2558, "step": 50200 }, { "epoch": 1.29, "learning_rate": 4.5704819482870514e-05, "loss": 3.2061, "step": 50300 }, { "epoch": 1.29, "learning_rate": 4.5696280356593915e-05, "loss": 3.1413, "step": 50400 }, { "epoch": 1.29, "learning_rate": 4.568774123031732e-05, "loss": 3.1439, "step": 50500 }, { "epoch": 1.3, "learning_rate": 4.567920210404072e-05, "loss": 3.4335, "step": 50600 }, { "epoch": 1.3, "learning_rate": 4.567066297776412e-05, "loss": 3.3769, "step": 50700 }, { "epoch": 1.3, "learning_rate": 4.566212385148752e-05, "loss": 3.2077, "step": 50800 }, { "epoch": 1.3, "learning_rate": 4.5653584725210916e-05, "loss": 3.2767, "step": 50900 }, { "epoch": 1.31, "learning_rate": 4.5645045598934324e-05, "loss": 3.3502, "step": 51000 }, { "epoch": 1.31, "learning_rate": 4.563650647265772e-05, "loss": 3.2704, "step": 51100 }, { "epoch": 1.31, "learning_rate": 4.562796734638112e-05, "loss": 3.3187, "step": 51200 }, { "epoch": 1.31, "learning_rate": 4.561942822010452e-05, "loss": 3.1367, "step": 51300 }, { "epoch": 1.32, "learning_rate": 4.5610889093827924e-05, "loss": 3.3311, "step": 51400 }, { "epoch": 1.32, "learning_rate": 4.5602349967551325e-05, "loss": 3.2221, "step": 51500 }, { "epoch": 1.32, "learning_rate": 4.559381084127473e-05, "loss": 3.3227, "step": 51600 }, { "epoch": 1.32, "learning_rate": 4.558527171499812e-05, "loss": 3.2998, "step": 51700 }, { "epoch": 1.33, "learning_rate": 4.557673258872153e-05, "loss": 3.3506, "step": 51800 }, { "epoch": 1.33, "learning_rate": 4.5568193462444925e-05, "loss": 3.3649, "step": 51900 }, { "epoch": 1.33, "learning_rate": 4.5559654336168326e-05, "loss": 3.3714, "step": 52000 }, { "epoch": 1.33, "learning_rate": 4.555111520989173e-05, "loss": 3.0725, "step": 52100 }, { "epoch": 1.34, "learning_rate": 4.554257608361513e-05, "loss": 3.2297, "step": 52200 }, { "epoch": 1.34, "learning_rate": 4.5534036957338524e-05, "loss": 3.3677, "step": 52300 }, { "epoch": 1.34, "learning_rate": 4.552549783106193e-05, "loss": 3.2325, "step": 52400 }, { "epoch": 1.34, "learning_rate": 4.551695870478533e-05, "loss": 3.142, "step": 52500 }, { "epoch": 1.35, "learning_rate": 4.550841957850873e-05, "loss": 3.1671, "step": 52600 }, { "epoch": 1.35, "learning_rate": 4.549988045223213e-05, "loss": 3.285, "step": 52700 }, { "epoch": 1.35, "learning_rate": 4.5491341325955525e-05, "loss": 3.0934, "step": 52800 }, { "epoch": 1.36, "learning_rate": 4.5482802199678934e-05, "loss": 3.3774, "step": 52900 }, { "epoch": 1.36, "learning_rate": 4.547426307340233e-05, "loss": 3.4387, "step": 53000 }, { "epoch": 1.36, "learning_rate": 4.546572394712573e-05, "loss": 3.2799, "step": 53100 }, { "epoch": 1.36, "learning_rate": 4.545718482084913e-05, "loss": 3.4177, "step": 53200 }, { "epoch": 1.37, "learning_rate": 4.544864569457253e-05, "loss": 3.248, "step": 53300 }, { "epoch": 1.37, "learning_rate": 4.5440106568295934e-05, "loss": 3.2275, "step": 53400 }, { "epoch": 1.37, "learning_rate": 4.5431567442019336e-05, "loss": 3.1949, "step": 53500 }, { "epoch": 1.37, "learning_rate": 4.542302831574273e-05, "loss": 3.3461, "step": 53600 }, { "epoch": 1.38, "learning_rate": 4.541448918946614e-05, "loss": 3.0244, "step": 53700 }, { "epoch": 1.38, "learning_rate": 4.5405950063189534e-05, "loss": 3.3828, "step": 53800 }, { "epoch": 1.38, "learning_rate": 4.5397410936912935e-05, "loss": 3.2586, "step": 53900 }, { "epoch": 1.38, "learning_rate": 4.538887181063634e-05, "loss": 3.2267, "step": 54000 }, { "epoch": 1.39, "learning_rate": 4.538033268435974e-05, "loss": 3.0411, "step": 54100 }, { "epoch": 1.39, "learning_rate": 4.537179355808313e-05, "loss": 3.4893, "step": 54200 }, { "epoch": 1.39, "learning_rate": 4.536325443180654e-05, "loss": 3.1938, "step": 54300 }, { "epoch": 1.39, "learning_rate": 4.5354715305529936e-05, "loss": 3.0352, "step": 54400 }, { "epoch": 1.4, "learning_rate": 4.534617617925334e-05, "loss": 3.1959, "step": 54500 }, { "epoch": 1.4, "learning_rate": 4.533763705297674e-05, "loss": 3.2033, "step": 54600 }, { "epoch": 1.4, "learning_rate": 4.532909792670014e-05, "loss": 3.2207, "step": 54700 }, { "epoch": 1.4, "learning_rate": 4.532055880042354e-05, "loss": 3.1523, "step": 54800 }, { "epoch": 1.41, "learning_rate": 4.5312019674146944e-05, "loss": 3.099, "step": 54900 }, { "epoch": 1.41, "learning_rate": 4.530348054787034e-05, "loss": 3.3705, "step": 55000 }, { "epoch": 1.41, "learning_rate": 4.529494142159375e-05, "loss": 3.1773, "step": 55100 }, { "epoch": 1.41, "learning_rate": 4.528640229531714e-05, "loss": 3.076, "step": 55200 }, { "epoch": 1.42, "learning_rate": 4.5277863169040544e-05, "loss": 3.1966, "step": 55300 }, { "epoch": 1.42, "learning_rate": 4.5269324042763945e-05, "loss": 3.1723, "step": 55400 }, { "epoch": 1.42, "learning_rate": 4.526078491648735e-05, "loss": 3.3688, "step": 55500 }, { "epoch": 1.42, "learning_rate": 4.525224579021075e-05, "loss": 3.241, "step": 55600 }, { "epoch": 1.43, "learning_rate": 4.524370666393415e-05, "loss": 3.3047, "step": 55700 }, { "epoch": 1.43, "learning_rate": 4.5235167537657545e-05, "loss": 2.9904, "step": 55800 }, { "epoch": 1.43, "learning_rate": 4.522662841138095e-05, "loss": 3.1969, "step": 55900 }, { "epoch": 1.43, "learning_rate": 4.521808928510435e-05, "loss": 3.1953, "step": 56000 }, { "epoch": 1.44, "learning_rate": 4.520955015882775e-05, "loss": 3.1532, "step": 56100 }, { "epoch": 1.44, "learning_rate": 4.520101103255115e-05, "loss": 3.2045, "step": 56200 }, { "epoch": 1.44, "learning_rate": 4.519247190627455e-05, "loss": 3.2981, "step": 56300 }, { "epoch": 1.44, "learning_rate": 4.5183932779997954e-05, "loss": 3.3037, "step": 56400 }, { "epoch": 1.45, "learning_rate": 4.5175393653721355e-05, "loss": 3.1982, "step": 56500 }, { "epoch": 1.45, "learning_rate": 4.516685452744475e-05, "loss": 3.2365, "step": 56600 }, { "epoch": 1.45, "learning_rate": 4.515831540116816e-05, "loss": 3.2118, "step": 56700 }, { "epoch": 1.46, "learning_rate": 4.514977627489155e-05, "loss": 3.2382, "step": 56800 }, { "epoch": 1.46, "learning_rate": 4.5141237148614955e-05, "loss": 3.1484, "step": 56900 }, { "epoch": 1.46, "learning_rate": 4.5132698022338356e-05, "loss": 3.255, "step": 57000 }, { "epoch": 1.46, "learning_rate": 4.512415889606176e-05, "loss": 3.3042, "step": 57100 }, { "epoch": 1.47, "learning_rate": 4.511561976978515e-05, "loss": 3.1508, "step": 57200 }, { "epoch": 1.47, "learning_rate": 4.510708064350856e-05, "loss": 3.1394, "step": 57300 }, { "epoch": 1.47, "learning_rate": 4.5098541517231956e-05, "loss": 3.1828, "step": 57400 }, { "epoch": 1.47, "learning_rate": 4.509000239095536e-05, "loss": 3.2893, "step": 57500 }, { "epoch": 1.48, "learning_rate": 4.508146326467876e-05, "loss": 2.9791, "step": 57600 }, { "epoch": 1.48, "learning_rate": 4.507292413840216e-05, "loss": 3.1324, "step": 57700 }, { "epoch": 1.48, "learning_rate": 4.506438501212556e-05, "loss": 3.1465, "step": 57800 }, { "epoch": 1.48, "learning_rate": 4.5055845885848963e-05, "loss": 3.172, "step": 57900 }, { "epoch": 1.49, "learning_rate": 4.504730675957236e-05, "loss": 3.2903, "step": 58000 }, { "epoch": 1.49, "learning_rate": 4.5038767633295767e-05, "loss": 3.2377, "step": 58100 }, { "epoch": 1.49, "learning_rate": 4.503022850701916e-05, "loss": 3.0679, "step": 58200 }, { "epoch": 1.49, "learning_rate": 4.502168938074256e-05, "loss": 3.3194, "step": 58300 }, { "epoch": 1.5, "learning_rate": 4.5013150254465964e-05, "loss": 3.2289, "step": 58400 }, { "epoch": 1.5, "learning_rate": 4.5004611128189366e-05, "loss": 3.1449, "step": 58500 }, { "epoch": 1.5, "learning_rate": 4.499607200191277e-05, "loss": 3.3433, "step": 58600 }, { "epoch": 1.5, "learning_rate": 4.498753287563617e-05, "loss": 3.3692, "step": 58700 }, { "epoch": 1.51, "learning_rate": 4.4978993749359564e-05, "loss": 3.2493, "step": 58800 }, { "epoch": 1.51, "learning_rate": 4.497045462308297e-05, "loss": 3.2536, "step": 58900 }, { "epoch": 1.51, "learning_rate": 4.496191549680637e-05, "loss": 3.2131, "step": 59000 }, { "epoch": 1.51, "learning_rate": 4.495337637052977e-05, "loss": 2.9688, "step": 59100 }, { "epoch": 1.52, "learning_rate": 4.494483724425317e-05, "loss": 3.3731, "step": 59200 }, { "epoch": 1.52, "learning_rate": 4.493629811797657e-05, "loss": 3.2793, "step": 59300 }, { "epoch": 1.52, "learning_rate": 4.4927758991699966e-05, "loss": 3.1691, "step": 59400 }, { "epoch": 1.52, "learning_rate": 4.4919219865423375e-05, "loss": 3.3579, "step": 59500 }, { "epoch": 1.53, "learning_rate": 4.491068073914677e-05, "loss": 3.3206, "step": 59600 }, { "epoch": 1.53, "learning_rate": 4.490214161287017e-05, "loss": 3.1517, "step": 59700 }, { "epoch": 1.53, "learning_rate": 4.489360248659357e-05, "loss": 3.3422, "step": 59800 }, { "epoch": 1.53, "learning_rate": 4.4885063360316974e-05, "loss": 3.2501, "step": 59900 }, { "epoch": 1.54, "learning_rate": 4.4876524234040376e-05, "loss": 3.0996, "step": 60000 }, { "epoch": 1.54, "learning_rate": 4.486798510776378e-05, "loss": 3.4043, "step": 60100 }, { "epoch": 1.54, "learning_rate": 4.485944598148717e-05, "loss": 3.2396, "step": 60200 }, { "epoch": 1.54, "learning_rate": 4.485090685521058e-05, "loss": 3.0377, "step": 60300 }, { "epoch": 1.55, "learning_rate": 4.4842367728933975e-05, "loss": 3.2877, "step": 60400 }, { "epoch": 1.55, "learning_rate": 4.483382860265738e-05, "loss": 3.2888, "step": 60500 }, { "epoch": 1.55, "learning_rate": 4.482528947638078e-05, "loss": 3.2973, "step": 60600 }, { "epoch": 1.55, "learning_rate": 4.481675035010418e-05, "loss": 3.2159, "step": 60700 }, { "epoch": 1.56, "learning_rate": 4.480821122382758e-05, "loss": 3.3014, "step": 60800 }, { "epoch": 1.56, "learning_rate": 4.479967209755098e-05, "loss": 3.0788, "step": 60900 }, { "epoch": 1.56, "learning_rate": 4.479113297127438e-05, "loss": 3.1098, "step": 61000 }, { "epoch": 1.57, "learning_rate": 4.4782593844997786e-05, "loss": 3.3015, "step": 61100 }, { "epoch": 1.57, "learning_rate": 4.477405471872118e-05, "loss": 3.1981, "step": 61200 }, { "epoch": 1.57, "learning_rate": 4.476551559244458e-05, "loss": 3.1165, "step": 61300 }, { "epoch": 1.57, "learning_rate": 4.4756976466167984e-05, "loss": 3.1384, "step": 61400 }, { "epoch": 1.58, "learning_rate": 4.4748437339891385e-05, "loss": 3.2794, "step": 61500 }, { "epoch": 1.58, "learning_rate": 4.473989821361479e-05, "loss": 3.3021, "step": 61600 }, { "epoch": 1.58, "learning_rate": 4.473135908733819e-05, "loss": 3.4168, "step": 61700 }, { "epoch": 1.58, "learning_rate": 4.472281996106158e-05, "loss": 3.1338, "step": 61800 }, { "epoch": 1.59, "learning_rate": 4.471428083478499e-05, "loss": 3.3508, "step": 61900 }, { "epoch": 1.59, "learning_rate": 4.4705741708508386e-05, "loss": 3.1625, "step": 62000 }, { "epoch": 1.59, "learning_rate": 4.469720258223179e-05, "loss": 3.3888, "step": 62100 }, { "epoch": 1.59, "learning_rate": 4.468866345595519e-05, "loss": 3.2584, "step": 62200 }, { "epoch": 1.6, "learning_rate": 4.468012432967859e-05, "loss": 3.3002, "step": 62300 }, { "epoch": 1.6, "learning_rate": 4.4671585203401986e-05, "loss": 3.173, "step": 62400 }, { "epoch": 1.6, "learning_rate": 4.4663046077125394e-05, "loss": 3.0448, "step": 62500 }, { "epoch": 1.6, "learning_rate": 4.465450695084879e-05, "loss": 3.0071, "step": 62600 }, { "epoch": 1.61, "learning_rate": 4.464596782457219e-05, "loss": 3.3659, "step": 62700 }, { "epoch": 1.61, "learning_rate": 4.463742869829559e-05, "loss": 3.1854, "step": 62800 }, { "epoch": 1.61, "learning_rate": 4.4628889572018993e-05, "loss": 3.3044, "step": 62900 }, { "epoch": 1.61, "learning_rate": 4.4620350445742395e-05, "loss": 3.2739, "step": 63000 }, { "epoch": 1.62, "learning_rate": 4.4611811319465797e-05, "loss": 3.2855, "step": 63100 }, { "epoch": 1.62, "learning_rate": 4.460327219318919e-05, "loss": 3.0983, "step": 63200 }, { "epoch": 1.62, "learning_rate": 4.45947330669126e-05, "loss": 3.1033, "step": 63300 }, { "epoch": 1.62, "learning_rate": 4.4586193940635994e-05, "loss": 3.0957, "step": 63400 }, { "epoch": 1.63, "learning_rate": 4.4577654814359396e-05, "loss": 3.0914, "step": 63500 }, { "epoch": 1.63, "learning_rate": 4.45691156880828e-05, "loss": 3.1328, "step": 63600 }, { "epoch": 1.63, "learning_rate": 4.45605765618062e-05, "loss": 3.1602, "step": 63700 }, { "epoch": 1.63, "learning_rate": 4.45520374355296e-05, "loss": 3.1097, "step": 63800 }, { "epoch": 1.64, "learning_rate": 4.4543498309253e-05, "loss": 3.0696, "step": 63900 }, { "epoch": 1.64, "learning_rate": 4.45349591829764e-05, "loss": 3.0489, "step": 64000 }, { "epoch": 1.64, "learning_rate": 4.4526420056699805e-05, "loss": 3.027, "step": 64100 }, { "epoch": 1.64, "learning_rate": 4.45178809304232e-05, "loss": 3.2089, "step": 64200 }, { "epoch": 1.65, "learning_rate": 4.45093418041466e-05, "loss": 3.1203, "step": 64300 }, { "epoch": 1.65, "learning_rate": 4.450080267787e-05, "loss": 3.0265, "step": 64400 }, { "epoch": 1.65, "learning_rate": 4.4492263551593405e-05, "loss": 3.2565, "step": 64500 }, { "epoch": 1.65, "learning_rate": 4.4483724425316806e-05, "loss": 2.9759, "step": 64600 }, { "epoch": 1.66, "learning_rate": 4.447518529904021e-05, "loss": 3.3455, "step": 64700 }, { "epoch": 1.66, "learning_rate": 4.44666461727636e-05, "loss": 3.1068, "step": 64800 }, { "epoch": 1.66, "learning_rate": 4.445810704648701e-05, "loss": 3.1791, "step": 64900 }, { "epoch": 1.67, "learning_rate": 4.4449567920210406e-05, "loss": 3.1656, "step": 65000 }, { "epoch": 1.67, "learning_rate": 4.444102879393381e-05, "loss": 3.235, "step": 65100 }, { "epoch": 1.67, "learning_rate": 4.443248966765721e-05, "loss": 3.0505, "step": 65200 }, { "epoch": 1.67, "learning_rate": 4.442395054138061e-05, "loss": 3.2848, "step": 65300 }, { "epoch": 1.68, "learning_rate": 4.4415411415104005e-05, "loss": 3.0479, "step": 65400 }, { "epoch": 1.68, "learning_rate": 4.440687228882741e-05, "loss": 3.2758, "step": 65500 }, { "epoch": 1.68, "learning_rate": 4.439833316255081e-05, "loss": 3.0013, "step": 65600 }, { "epoch": 1.68, "learning_rate": 4.438979403627421e-05, "loss": 3.1324, "step": 65700 }, { "epoch": 1.69, "learning_rate": 4.438125490999761e-05, "loss": 3.0407, "step": 65800 }, { "epoch": 1.69, "learning_rate": 4.437271578372101e-05, "loss": 3.2393, "step": 65900 }, { "epoch": 1.69, "learning_rate": 4.4364176657444414e-05, "loss": 3.1361, "step": 66000 }, { "epoch": 1.69, "learning_rate": 4.4355637531167816e-05, "loss": 3.2417, "step": 66100 }, { "epoch": 1.7, "learning_rate": 4.434709840489121e-05, "loss": 3.2056, "step": 66200 }, { "epoch": 1.7, "learning_rate": 4.433855927861462e-05, "loss": 3.2976, "step": 66300 }, { "epoch": 1.7, "learning_rate": 4.4330020152338014e-05, "loss": 3.2442, "step": 66400 }, { "epoch": 1.7, "learning_rate": 4.4321481026061415e-05, "loss": 3.1343, "step": 66500 }, { "epoch": 1.71, "learning_rate": 4.431294189978482e-05, "loss": 3.213, "step": 66600 }, { "epoch": 1.71, "learning_rate": 4.430440277350822e-05, "loss": 3.1263, "step": 66700 }, { "epoch": 1.71, "learning_rate": 4.429586364723162e-05, "loss": 3.2514, "step": 66800 }, { "epoch": 1.71, "learning_rate": 4.428732452095502e-05, "loss": 3.1529, "step": 66900 }, { "epoch": 1.72, "learning_rate": 4.4278785394678416e-05, "loss": 3.1256, "step": 67000 }, { "epoch": 1.72, "learning_rate": 4.4270246268401825e-05, "loss": 3.3028, "step": 67100 }, { "epoch": 1.72, "learning_rate": 4.426170714212522e-05, "loss": 2.9583, "step": 67200 }, { "epoch": 1.72, "learning_rate": 4.425316801584862e-05, "loss": 3.3054, "step": 67300 }, { "epoch": 1.73, "learning_rate": 4.424462888957202e-05, "loss": 3.3126, "step": 67400 }, { "epoch": 1.73, "learning_rate": 4.4236089763295424e-05, "loss": 3.0507, "step": 67500 }, { "epoch": 1.73, "learning_rate": 4.422755063701882e-05, "loss": 3.1097, "step": 67600 }, { "epoch": 1.73, "learning_rate": 4.421901151074223e-05, "loss": 2.932, "step": 67700 }, { "epoch": 1.74, "learning_rate": 4.421047238446562e-05, "loss": 3.1756, "step": 67800 }, { "epoch": 1.74, "learning_rate": 4.4201933258189023e-05, "loss": 3.126, "step": 67900 }, { "epoch": 1.74, "learning_rate": 4.4193394131912425e-05, "loss": 3.154, "step": 68000 }, { "epoch": 1.74, "learning_rate": 4.4184855005635827e-05, "loss": 3.1391, "step": 68100 }, { "epoch": 1.75, "learning_rate": 4.417631587935923e-05, "loss": 3.2438, "step": 68200 }, { "epoch": 1.75, "learning_rate": 4.416777675308263e-05, "loss": 3.1666, "step": 68300 }, { "epoch": 1.75, "learning_rate": 4.4159237626806024e-05, "loss": 3.1894, "step": 68400 }, { "epoch": 1.75, "learning_rate": 4.415069850052943e-05, "loss": 3.1682, "step": 68500 }, { "epoch": 1.76, "learning_rate": 4.414215937425283e-05, "loss": 3.1575, "step": 68600 }, { "epoch": 1.76, "learning_rate": 4.413362024797623e-05, "loss": 2.9601, "step": 68700 }, { "epoch": 1.76, "learning_rate": 4.412508112169963e-05, "loss": 3.2788, "step": 68800 }, { "epoch": 1.77, "learning_rate": 4.411654199542303e-05, "loss": 3.3421, "step": 68900 }, { "epoch": 1.77, "learning_rate": 4.4108002869146434e-05, "loss": 3.0026, "step": 69000 }, { "epoch": 1.77, "learning_rate": 4.4099463742869835e-05, "loss": 3.2379, "step": 69100 }, { "epoch": 1.77, "learning_rate": 4.409092461659323e-05, "loss": 3.2624, "step": 69200 }, { "epoch": 1.78, "learning_rate": 4.408238549031664e-05, "loss": 3.0439, "step": 69300 }, { "epoch": 1.78, "learning_rate": 4.407384636404003e-05, "loss": 3.2517, "step": 69400 }, { "epoch": 1.78, "learning_rate": 4.4065307237763435e-05, "loss": 3.259, "step": 69500 }, { "epoch": 1.78, "learning_rate": 4.4056768111486836e-05, "loss": 3.1286, "step": 69600 }, { "epoch": 1.79, "learning_rate": 4.404822898521024e-05, "loss": 3.2362, "step": 69700 }, { "epoch": 1.79, "learning_rate": 4.403968985893364e-05, "loss": 3.1902, "step": 69800 }, { "epoch": 1.79, "learning_rate": 4.403115073265704e-05, "loss": 3.4516, "step": 69900 }, { "epoch": 1.79, "learning_rate": 4.4022611606380436e-05, "loss": 3.0569, "step": 70000 }, { "epoch": 1.8, "learning_rate": 4.4014072480103844e-05, "loss": 3.2138, "step": 70100 }, { "epoch": 1.8, "learning_rate": 4.400553335382724e-05, "loss": 3.1757, "step": 70200 }, { "epoch": 1.8, "learning_rate": 4.3996994227550633e-05, "loss": 3.2162, "step": 70300 }, { "epoch": 1.8, "learning_rate": 4.398845510127404e-05, "loss": 3.2975, "step": 70400 }, { "epoch": 1.81, "learning_rate": 4.3979915974997437e-05, "loss": 3.1081, "step": 70500 }, { "epoch": 1.81, "learning_rate": 4.397137684872084e-05, "loss": 3.1195, "step": 70600 }, { "epoch": 1.81, "learning_rate": 4.396283772244424e-05, "loss": 3.2665, "step": 70700 }, { "epoch": 1.81, "learning_rate": 4.395429859616764e-05, "loss": 3.125, "step": 70800 }, { "epoch": 1.82, "learning_rate": 4.394575946989104e-05, "loss": 3.2652, "step": 70900 }, { "epoch": 1.82, "learning_rate": 4.3937220343614444e-05, "loss": 3.0792, "step": 71000 }, { "epoch": 1.82, "learning_rate": 4.392868121733784e-05, "loss": 3.2215, "step": 71100 }, { "epoch": 1.82, "learning_rate": 4.392014209106125e-05, "loss": 2.9989, "step": 71200 }, { "epoch": 1.83, "learning_rate": 4.391160296478464e-05, "loss": 3.2054, "step": 71300 }, { "epoch": 1.83, "learning_rate": 4.3903063838508044e-05, "loss": 3.1994, "step": 71400 }, { "epoch": 1.83, "learning_rate": 4.3894524712231445e-05, "loss": 3.0047, "step": 71500 }, { "epoch": 1.83, "learning_rate": 4.388598558595485e-05, "loss": 2.9717, "step": 71600 }, { "epoch": 1.84, "learning_rate": 4.387744645967825e-05, "loss": 3.1778, "step": 71700 }, { "epoch": 1.84, "learning_rate": 4.386890733340165e-05, "loss": 3.1838, "step": 71800 }, { "epoch": 1.84, "learning_rate": 4.3860368207125045e-05, "loss": 3.0509, "step": 71900 }, { "epoch": 1.84, "learning_rate": 4.385182908084845e-05, "loss": 3.0945, "step": 72000 }, { "epoch": 1.85, "learning_rate": 4.384328995457185e-05, "loss": 3.2435, "step": 72100 }, { "epoch": 1.85, "learning_rate": 4.383475082829525e-05, "loss": 3.134, "step": 72200 }, { "epoch": 1.85, "learning_rate": 4.382621170201865e-05, "loss": 3.4882, "step": 72300 }, { "epoch": 1.85, "learning_rate": 4.381767257574205e-05, "loss": 3.4011, "step": 72400 }, { "epoch": 1.86, "learning_rate": 4.380913344946545e-05, "loss": 3.1124, "step": 72500 }, { "epoch": 1.86, "learning_rate": 4.3800594323188856e-05, "loss": 3.2966, "step": 72600 }, { "epoch": 1.86, "learning_rate": 4.379205519691225e-05, "loss": 3.3746, "step": 72700 }, { "epoch": 1.86, "learning_rate": 4.378351607063565e-05, "loss": 3.085, "step": 72800 }, { "epoch": 1.87, "learning_rate": 4.3774976944359053e-05, "loss": 3.1161, "step": 72900 }, { "epoch": 1.87, "learning_rate": 4.3766437818082455e-05, "loss": 3.0355, "step": 73000 }, { "epoch": 1.87, "learning_rate": 4.3757898691805857e-05, "loss": 3.1345, "step": 73100 }, { "epoch": 1.88, "learning_rate": 4.374935956552926e-05, "loss": 3.2653, "step": 73200 }, { "epoch": 1.88, "learning_rate": 4.374082043925265e-05, "loss": 3.0472, "step": 73300 }, { "epoch": 1.88, "learning_rate": 4.373228131297606e-05, "loss": 3.0984, "step": 73400 }, { "epoch": 1.88, "learning_rate": 4.3723742186699456e-05, "loss": 3.2532, "step": 73500 }, { "epoch": 1.89, "learning_rate": 4.371520306042286e-05, "loss": 3.2685, "step": 73600 }, { "epoch": 1.89, "learning_rate": 4.370666393414626e-05, "loss": 3.2064, "step": 73700 }, { "epoch": 1.89, "learning_rate": 4.369812480786966e-05, "loss": 2.9865, "step": 73800 }, { "epoch": 1.89, "learning_rate": 4.368958568159306e-05, "loss": 3.0411, "step": 73900 }, { "epoch": 1.9, "learning_rate": 4.3681046555316464e-05, "loss": 3.0619, "step": 74000 }, { "epoch": 1.9, "learning_rate": 4.367250742903986e-05, "loss": 3.271, "step": 74100 }, { "epoch": 1.9, "learning_rate": 4.366396830276327e-05, "loss": 3.2532, "step": 74200 }, { "epoch": 1.9, "learning_rate": 4.365542917648666e-05, "loss": 3.18, "step": 74300 }, { "epoch": 1.91, "learning_rate": 4.364689005021006e-05, "loss": 3.3295, "step": 74400 }, { "epoch": 1.91, "learning_rate": 4.3638350923933465e-05, "loss": 3.1967, "step": 74500 }, { "epoch": 1.91, "learning_rate": 4.3629811797656866e-05, "loss": 3.2007, "step": 74600 }, { "epoch": 1.91, "learning_rate": 4.362127267138027e-05, "loss": 3.2682, "step": 74700 }, { "epoch": 1.92, "learning_rate": 4.361273354510367e-05, "loss": 2.9575, "step": 74800 }, { "epoch": 1.92, "learning_rate": 4.3604194418827064e-05, "loss": 3.2221, "step": 74900 }, { "epoch": 1.92, "learning_rate": 4.359565529255047e-05, "loss": 2.9996, "step": 75000 }, { "epoch": 1.92, "learning_rate": 4.358711616627387e-05, "loss": 3.0046, "step": 75100 }, { "epoch": 1.93, "learning_rate": 4.357857703999727e-05, "loss": 3.1096, "step": 75200 }, { "epoch": 1.93, "learning_rate": 4.357003791372067e-05, "loss": 3.3285, "step": 75300 }, { "epoch": 1.93, "learning_rate": 4.356149878744407e-05, "loss": 3.1116, "step": 75400 }, { "epoch": 1.93, "learning_rate": 4.3552959661167467e-05, "loss": 3.2307, "step": 75500 }, { "epoch": 1.94, "learning_rate": 4.3544420534890875e-05, "loss": 3.3776, "step": 75600 }, { "epoch": 1.94, "learning_rate": 4.353588140861427e-05, "loss": 3.2924, "step": 75700 }, { "epoch": 1.94, "learning_rate": 4.352734228233767e-05, "loss": 3.117, "step": 75800 }, { "epoch": 1.94, "learning_rate": 4.351880315606107e-05, "loss": 3.2263, "step": 75900 }, { "epoch": 1.95, "learning_rate": 4.3510264029784474e-05, "loss": 3.0552, "step": 76000 }, { "epoch": 1.95, "learning_rate": 4.3501724903507876e-05, "loss": 3.142, "step": 76100 }, { "epoch": 1.95, "learning_rate": 4.349318577723128e-05, "loss": 3.3654, "step": 76200 }, { "epoch": 1.95, "learning_rate": 4.348464665095467e-05, "loss": 3.1482, "step": 76300 }, { "epoch": 1.96, "learning_rate": 4.347610752467808e-05, "loss": 3.1302, "step": 76400 }, { "epoch": 1.96, "learning_rate": 4.3467568398401475e-05, "loss": 3.2023, "step": 76500 }, { "epoch": 1.96, "learning_rate": 4.345902927212488e-05, "loss": 3.0512, "step": 76600 }, { "epoch": 1.96, "learning_rate": 4.345049014584828e-05, "loss": 3.2809, "step": 76700 }, { "epoch": 1.97, "learning_rate": 4.344195101957168e-05, "loss": 3.1384, "step": 76800 }, { "epoch": 1.97, "learning_rate": 4.343341189329508e-05, "loss": 3.2299, "step": 76900 }, { "epoch": 1.97, "learning_rate": 4.342487276701848e-05, "loss": 3.0812, "step": 77000 }, { "epoch": 1.98, "learning_rate": 4.341633364074188e-05, "loss": 3.2408, "step": 77100 }, { "epoch": 1.98, "learning_rate": 4.3407794514465286e-05, "loss": 3.1539, "step": 77200 }, { "epoch": 1.98, "learning_rate": 4.339925538818868e-05, "loss": 3.1098, "step": 77300 }, { "epoch": 1.98, "learning_rate": 4.339071626191208e-05, "loss": 3.066, "step": 77400 }, { "epoch": 1.99, "learning_rate": 4.3382177135635484e-05, "loss": 2.984, "step": 77500 }, { "epoch": 1.99, "learning_rate": 4.3373638009358886e-05, "loss": 3.2029, "step": 77600 }, { "epoch": 1.99, "learning_rate": 4.336509888308228e-05, "loss": 3.1193, "step": 77700 }, { "epoch": 1.99, "learning_rate": 4.335655975680569e-05, "loss": 3.193, "step": 77800 }, { "epoch": 2.0, "learning_rate": 4.3348020630529083e-05, "loss": 3.2723, "step": 77900 }, { "epoch": 2.0, "learning_rate": 4.3339481504252485e-05, "loss": 3.3588, "step": 78000 }, { "epoch": 2.0, "learning_rate": 4.3330942377975886e-05, "loss": 3.0575, "step": 78100 }, { "epoch": 2.0, "learning_rate": 4.332240325169929e-05, "loss": 2.9386, "step": 78200 }, { "epoch": 2.01, "learning_rate": 4.331386412542269e-05, "loss": 3.1232, "step": 78300 }, { "epoch": 2.01, "learning_rate": 4.330532499914609e-05, "loss": 2.9355, "step": 78400 }, { "epoch": 2.01, "learning_rate": 4.3296785872869486e-05, "loss": 3.0254, "step": 78500 }, { "epoch": 2.01, "learning_rate": 4.3288246746592894e-05, "loss": 3.2184, "step": 78600 }, { "epoch": 2.02, "learning_rate": 4.327970762031629e-05, "loss": 3.0451, "step": 78700 }, { "epoch": 2.02, "learning_rate": 4.327116849403969e-05, "loss": 3.085, "step": 78800 }, { "epoch": 2.02, "learning_rate": 4.326262936776309e-05, "loss": 2.9988, "step": 78900 }, { "epoch": 2.02, "learning_rate": 4.3254090241486494e-05, "loss": 3.0037, "step": 79000 }, { "epoch": 2.03, "learning_rate": 4.3245551115209895e-05, "loss": 3.1751, "step": 79100 }, { "epoch": 2.03, "learning_rate": 4.32370119889333e-05, "loss": 3.0905, "step": 79200 }, { "epoch": 2.03, "learning_rate": 4.322847286265669e-05, "loss": 3.0897, "step": 79300 }, { "epoch": 2.03, "learning_rate": 4.32199337363801e-05, "loss": 3.1822, "step": 79400 }, { "epoch": 2.04, "learning_rate": 4.3211394610103495e-05, "loss": 3.2876, "step": 79500 }, { "epoch": 2.04, "learning_rate": 4.3202855483826896e-05, "loss": 3.1157, "step": 79600 }, { "epoch": 2.04, "learning_rate": 4.31943163575503e-05, "loss": 3.1676, "step": 79700 }, { "epoch": 2.04, "learning_rate": 4.31857772312737e-05, "loss": 3.2256, "step": 79800 }, { "epoch": 2.05, "learning_rate": 4.31772381049971e-05, "loss": 3.162, "step": 79900 }, { "epoch": 2.05, "learning_rate": 4.31686989787205e-05, "loss": 3.0349, "step": 80000 }, { "epoch": 2.05, "learning_rate": 4.31601598524439e-05, "loss": 3.0997, "step": 80100 }, { "epoch": 2.05, "learning_rate": 4.3151620726167305e-05, "loss": 3.1964, "step": 80200 }, { "epoch": 2.06, "learning_rate": 4.31430815998907e-05, "loss": 3.1396, "step": 80300 }, { "epoch": 2.06, "learning_rate": 4.31345424736141e-05, "loss": 3.1143, "step": 80400 }, { "epoch": 2.06, "learning_rate": 4.31260033473375e-05, "loss": 3.0063, "step": 80500 }, { "epoch": 2.06, "learning_rate": 4.3117464221060905e-05, "loss": 3.2515, "step": 80600 }, { "epoch": 2.07, "learning_rate": 4.31089250947843e-05, "loss": 3.1233, "step": 80700 }, { "epoch": 2.07, "learning_rate": 4.310038596850771e-05, "loss": 2.9936, "step": 80800 }, { "epoch": 2.07, "learning_rate": 4.30918468422311e-05, "loss": 2.9814, "step": 80900 }, { "epoch": 2.08, "learning_rate": 4.3083307715954504e-05, "loss": 3.2333, "step": 81000 }, { "epoch": 2.08, "learning_rate": 4.3074768589677906e-05, "loss": 3.2694, "step": 81100 }, { "epoch": 2.08, "learning_rate": 4.306622946340131e-05, "loss": 3.0663, "step": 81200 }, { "epoch": 2.08, "learning_rate": 4.305769033712471e-05, "loss": 3.4595, "step": 81300 }, { "epoch": 2.09, "learning_rate": 4.304915121084811e-05, "loss": 3.2051, "step": 81400 }, { "epoch": 2.09, "learning_rate": 4.3040612084571505e-05, "loss": 3.2371, "step": 81500 }, { "epoch": 2.09, "learning_rate": 4.3032072958294914e-05, "loss": 3.1418, "step": 81600 }, { "epoch": 2.09, "learning_rate": 4.302353383201831e-05, "loss": 3.3268, "step": 81700 }, { "epoch": 2.1, "learning_rate": 4.301499470574171e-05, "loss": 3.1883, "step": 81800 }, { "epoch": 2.1, "learning_rate": 4.300645557946511e-05, "loss": 3.2198, "step": 81900 }, { "epoch": 2.1, "learning_rate": 4.299791645318851e-05, "loss": 3.3429, "step": 82000 }, { "epoch": 2.1, "learning_rate": 4.2989377326911915e-05, "loss": 3.0335, "step": 82100 }, { "epoch": 2.11, "learning_rate": 4.2980838200635316e-05, "loss": 3.1086, "step": 82200 }, { "epoch": 2.11, "learning_rate": 4.297229907435871e-05, "loss": 3.1151, "step": 82300 }, { "epoch": 2.11, "learning_rate": 4.296375994808212e-05, "loss": 3.1687, "step": 82400 }, { "epoch": 2.11, "learning_rate": 4.2955220821805514e-05, "loss": 2.9901, "step": 82500 }, { "epoch": 2.12, "learning_rate": 4.2946681695528916e-05, "loss": 3.3057, "step": 82600 }, { "epoch": 2.12, "learning_rate": 4.293814256925232e-05, "loss": 3.1803, "step": 82700 }, { "epoch": 2.12, "learning_rate": 4.292960344297572e-05, "loss": 3.0191, "step": 82800 }, { "epoch": 2.12, "learning_rate": 4.292106431669911e-05, "loss": 3.1051, "step": 82900 }, { "epoch": 2.13, "learning_rate": 4.291252519042252e-05, "loss": 3.2292, "step": 83000 }, { "epoch": 2.13, "learning_rate": 4.2903986064145916e-05, "loss": 3.0433, "step": 83100 }, { "epoch": 2.13, "learning_rate": 4.289544693786932e-05, "loss": 3.1835, "step": 83200 }, { "epoch": 2.13, "learning_rate": 4.288690781159272e-05, "loss": 3.3649, "step": 83300 }, { "epoch": 2.14, "learning_rate": 4.287836868531612e-05, "loss": 3.0549, "step": 83400 }, { "epoch": 2.14, "learning_rate": 4.286982955903952e-05, "loss": 3.159, "step": 83500 }, { "epoch": 2.14, "learning_rate": 4.2861290432762924e-05, "loss": 3.0583, "step": 83600 }, { "epoch": 2.14, "learning_rate": 4.285275130648632e-05, "loss": 3.1267, "step": 83700 }, { "epoch": 2.15, "learning_rate": 4.284421218020973e-05, "loss": 3.1198, "step": 83800 }, { "epoch": 2.15, "learning_rate": 4.283567305393312e-05, "loss": 3.261, "step": 83900 }, { "epoch": 2.15, "learning_rate": 4.2827133927656524e-05, "loss": 2.9281, "step": 84000 }, { "epoch": 2.15, "learning_rate": 4.2818594801379925e-05, "loss": 3.0939, "step": 84100 }, { "epoch": 2.16, "learning_rate": 4.281005567510333e-05, "loss": 3.0363, "step": 84200 }, { "epoch": 2.16, "learning_rate": 4.280151654882673e-05, "loss": 3.068, "step": 84300 }, { "epoch": 2.16, "learning_rate": 4.279297742255013e-05, "loss": 3.129, "step": 84400 }, { "epoch": 2.16, "learning_rate": 4.2784438296273525e-05, "loss": 3.2133, "step": 84500 }, { "epoch": 2.17, "learning_rate": 4.277589916999693e-05, "loss": 3.2536, "step": 84600 }, { "epoch": 2.17, "learning_rate": 4.276736004372033e-05, "loss": 2.9967, "step": 84700 }, { "epoch": 2.17, "learning_rate": 4.275882091744373e-05, "loss": 2.9866, "step": 84800 }, { "epoch": 2.17, "learning_rate": 4.275028179116713e-05, "loss": 3.1255, "step": 84900 }, { "epoch": 2.18, "learning_rate": 4.274174266489053e-05, "loss": 2.9926, "step": 85000 }, { "epoch": 2.18, "learning_rate": 4.2733203538613934e-05, "loss": 3.0661, "step": 85100 }, { "epoch": 2.18, "learning_rate": 4.2724664412337335e-05, "loss": 3.1502, "step": 85200 }, { "epoch": 2.19, "learning_rate": 4.271612528606073e-05, "loss": 2.8778, "step": 85300 }, { "epoch": 2.19, "learning_rate": 4.270758615978414e-05, "loss": 3.2147, "step": 85400 }, { "epoch": 2.19, "learning_rate": 4.269904703350753e-05, "loss": 2.9854, "step": 85500 }, { "epoch": 2.19, "learning_rate": 4.2690507907230935e-05, "loss": 3.1181, "step": 85600 }, { "epoch": 2.2, "learning_rate": 4.2681968780954336e-05, "loss": 2.9929, "step": 85700 }, { "epoch": 2.2, "learning_rate": 4.267342965467774e-05, "loss": 3.1671, "step": 85800 }, { "epoch": 2.2, "learning_rate": 4.266489052840113e-05, "loss": 3.0857, "step": 85900 }, { "epoch": 2.2, "learning_rate": 4.265635140212454e-05, "loss": 3.1893, "step": 86000 }, { "epoch": 2.21, "learning_rate": 4.2647812275847936e-05, "loss": 3.1835, "step": 86100 }, { "epoch": 2.21, "learning_rate": 4.263927314957134e-05, "loss": 3.3165, "step": 86200 }, { "epoch": 2.21, "learning_rate": 4.263073402329474e-05, "loss": 3.0212, "step": 86300 }, { "epoch": 2.21, "learning_rate": 4.262219489701814e-05, "loss": 3.0893, "step": 86400 }, { "epoch": 2.22, "learning_rate": 4.261365577074154e-05, "loss": 3.0834, "step": 86500 }, { "epoch": 2.22, "learning_rate": 4.2605116644464944e-05, "loss": 3.0804, "step": 86600 }, { "epoch": 2.22, "learning_rate": 4.259657751818834e-05, "loss": 3.133, "step": 86700 }, { "epoch": 2.22, "learning_rate": 4.258803839191175e-05, "loss": 3.3209, "step": 86800 }, { "epoch": 2.23, "learning_rate": 4.257949926563514e-05, "loss": 3.1159, "step": 86900 }, { "epoch": 2.23, "learning_rate": 4.257096013935854e-05, "loss": 3.0212, "step": 87000 }, { "epoch": 2.23, "learning_rate": 4.2562421013081945e-05, "loss": 3.0374, "step": 87100 }, { "epoch": 2.23, "learning_rate": 4.2553881886805346e-05, "loss": 2.8671, "step": 87200 }, { "epoch": 2.24, "learning_rate": 4.254534276052875e-05, "loss": 3.211, "step": 87300 }, { "epoch": 2.24, "learning_rate": 4.253680363425215e-05, "loss": 3.2994, "step": 87400 }, { "epoch": 2.24, "learning_rate": 4.2528264507975544e-05, "loss": 3.1808, "step": 87500 }, { "epoch": 2.24, "learning_rate": 4.251972538169895e-05, "loss": 3.0728, "step": 87600 }, { "epoch": 2.25, "learning_rate": 4.251118625542235e-05, "loss": 3.1494, "step": 87700 }, { "epoch": 2.25, "learning_rate": 4.250264712914574e-05, "loss": 3.0919, "step": 87800 }, { "epoch": 2.25, "learning_rate": 4.249410800286915e-05, "loss": 3.1287, "step": 87900 }, { "epoch": 2.25, "learning_rate": 4.2485568876592545e-05, "loss": 3.0724, "step": 88000 }, { "epoch": 2.26, "learning_rate": 4.2477029750315946e-05, "loss": 3.1844, "step": 88100 }, { "epoch": 2.26, "learning_rate": 4.246849062403935e-05, "loss": 2.9444, "step": 88200 }, { "epoch": 2.26, "learning_rate": 4.245995149776275e-05, "loss": 3.2252, "step": 88300 }, { "epoch": 2.26, "learning_rate": 4.245141237148615e-05, "loss": 2.9772, "step": 88400 }, { "epoch": 2.27, "learning_rate": 4.244287324520955e-05, "loss": 3.1618, "step": 88500 }, { "epoch": 2.27, "learning_rate": 4.243433411893295e-05, "loss": 3.2035, "step": 88600 }, { "epoch": 2.27, "learning_rate": 4.2425794992656356e-05, "loss": 3.03, "step": 88700 }, { "epoch": 2.27, "learning_rate": 4.241725586637975e-05, "loss": 3.0816, "step": 88800 }, { "epoch": 2.28, "learning_rate": 4.240871674010315e-05, "loss": 3.0276, "step": 88900 }, { "epoch": 2.28, "learning_rate": 4.2400177613826554e-05, "loss": 3.1817, "step": 89000 }, { "epoch": 2.28, "learning_rate": 4.2391638487549955e-05, "loss": 2.8854, "step": 89100 }, { "epoch": 2.29, "learning_rate": 4.238309936127336e-05, "loss": 2.99, "step": 89200 }, { "epoch": 2.29, "learning_rate": 4.237456023499676e-05, "loss": 3.2029, "step": 89300 }, { "epoch": 2.29, "learning_rate": 4.236602110872015e-05, "loss": 3.1559, "step": 89400 }, { "epoch": 2.29, "learning_rate": 4.235748198244356e-05, "loss": 3.1818, "step": 89500 }, { "epoch": 2.3, "learning_rate": 4.2348942856166956e-05, "loss": 3.1181, "step": 89600 }, { "epoch": 2.3, "learning_rate": 4.234040372989036e-05, "loss": 3.1893, "step": 89700 }, { "epoch": 2.3, "learning_rate": 4.233186460361376e-05, "loss": 3.1009, "step": 89800 }, { "epoch": 2.3, "learning_rate": 4.232332547733716e-05, "loss": 3.2049, "step": 89900 }, { "epoch": 2.31, "learning_rate": 4.231478635106056e-05, "loss": 3.0272, "step": 90000 }, { "epoch": 2.31, "learning_rate": 4.2306247224783964e-05, "loss": 3.2394, "step": 90100 }, { "epoch": 2.31, "learning_rate": 4.229770809850736e-05, "loss": 2.8205, "step": 90200 }, { "epoch": 2.31, "learning_rate": 4.228916897223077e-05, "loss": 3.2412, "step": 90300 }, { "epoch": 2.32, "learning_rate": 4.228062984595416e-05, "loss": 2.9937, "step": 90400 }, { "epoch": 2.32, "learning_rate": 4.227209071967756e-05, "loss": 3.2042, "step": 90500 }, { "epoch": 2.32, "learning_rate": 4.2263551593400965e-05, "loss": 3.0972, "step": 90600 }, { "epoch": 2.32, "learning_rate": 4.2255012467124366e-05, "loss": 3.2973, "step": 90700 }, { "epoch": 2.33, "learning_rate": 4.224647334084776e-05, "loss": 3.0852, "step": 90800 }, { "epoch": 2.33, "learning_rate": 4.223793421457117e-05, "loss": 3.0435, "step": 90900 }, { "epoch": 2.33, "learning_rate": 4.2229395088294564e-05, "loss": 3.1896, "step": 91000 }, { "epoch": 2.33, "learning_rate": 4.2220855962017966e-05, "loss": 3.2066, "step": 91100 }, { "epoch": 2.34, "learning_rate": 4.221231683574137e-05, "loss": 3.0411, "step": 91200 }, { "epoch": 2.34, "learning_rate": 4.220377770946477e-05, "loss": 3.1446, "step": 91300 }, { "epoch": 2.34, "learning_rate": 4.219523858318817e-05, "loss": 2.9866, "step": 91400 }, { "epoch": 2.34, "learning_rate": 4.218669945691157e-05, "loss": 2.9629, "step": 91500 }, { "epoch": 2.35, "learning_rate": 4.217816033063497e-05, "loss": 3.0371, "step": 91600 }, { "epoch": 2.35, "learning_rate": 4.2169621204358375e-05, "loss": 3.1809, "step": 91700 }, { "epoch": 2.35, "learning_rate": 4.216108207808177e-05, "loss": 3.2357, "step": 91800 }, { "epoch": 2.35, "learning_rate": 4.215254295180517e-05, "loss": 3.298, "step": 91900 }, { "epoch": 2.36, "learning_rate": 4.214400382552857e-05, "loss": 3.0594, "step": 92000 }, { "epoch": 2.36, "learning_rate": 4.2135464699251974e-05, "loss": 2.8645, "step": 92100 }, { "epoch": 2.36, "learning_rate": 4.2126925572975376e-05, "loss": 3.1693, "step": 92200 }, { "epoch": 2.36, "learning_rate": 4.211838644669878e-05, "loss": 3.0464, "step": 92300 }, { "epoch": 2.37, "learning_rate": 4.210984732042217e-05, "loss": 3.0673, "step": 92400 }, { "epoch": 2.37, "learning_rate": 4.210130819414558e-05, "loss": 2.959, "step": 92500 }, { "epoch": 2.37, "learning_rate": 4.2092769067868975e-05, "loss": 3.0101, "step": 92600 }, { "epoch": 2.37, "learning_rate": 4.208422994159238e-05, "loss": 3.13, "step": 92700 }, { "epoch": 2.38, "learning_rate": 4.207569081531578e-05, "loss": 3.0754, "step": 92800 }, { "epoch": 2.38, "learning_rate": 4.206715168903918e-05, "loss": 3.0374, "step": 92900 }, { "epoch": 2.38, "learning_rate": 4.2058612562762575e-05, "loss": 3.2083, "step": 93000 }, { "epoch": 2.38, "learning_rate": 4.205007343648598e-05, "loss": 2.9163, "step": 93100 }, { "epoch": 2.39, "learning_rate": 4.204153431020938e-05, "loss": 3.0933, "step": 93200 }, { "epoch": 2.39, "learning_rate": 4.2032995183932786e-05, "loss": 3.125, "step": 93300 }, { "epoch": 2.39, "learning_rate": 4.202445605765618e-05, "loss": 3.1055, "step": 93400 }, { "epoch": 2.4, "learning_rate": 4.201591693137958e-05, "loss": 3.0669, "step": 93500 }, { "epoch": 2.4, "learning_rate": 4.2007377805102984e-05, "loss": 3.0477, "step": 93600 }, { "epoch": 2.4, "learning_rate": 4.1998838678826386e-05, "loss": 3.1347, "step": 93700 }, { "epoch": 2.4, "learning_rate": 4.199029955254978e-05, "loss": 3.1189, "step": 93800 }, { "epoch": 2.41, "learning_rate": 4.198176042627319e-05, "loss": 3.2261, "step": 93900 }, { "epoch": 2.41, "learning_rate": 4.1973221299996584e-05, "loss": 3.0995, "step": 94000 }, { "epoch": 2.41, "learning_rate": 4.1964682173719985e-05, "loss": 3.0714, "step": 94100 }, { "epoch": 2.41, "learning_rate": 4.195614304744339e-05, "loss": 3.2601, "step": 94200 }, { "epoch": 2.42, "learning_rate": 4.194760392116679e-05, "loss": 3.1525, "step": 94300 }, { "epoch": 2.42, "learning_rate": 4.193906479489019e-05, "loss": 3.0394, "step": 94400 }, { "epoch": 2.42, "learning_rate": 4.193052566861359e-05, "loss": 3.2386, "step": 94500 }, { "epoch": 2.42, "learning_rate": 4.1921986542336986e-05, "loss": 3.239, "step": 94600 }, { "epoch": 2.43, "learning_rate": 4.1913447416060394e-05, "loss": 3.0516, "step": 94700 }, { "epoch": 2.43, "learning_rate": 4.190490828978379e-05, "loss": 3.0337, "step": 94800 }, { "epoch": 2.43, "learning_rate": 4.189636916350719e-05, "loss": 2.9627, "step": 94900 }, { "epoch": 2.43, "learning_rate": 4.188783003723059e-05, "loss": 3.0803, "step": 95000 }, { "epoch": 2.44, "learning_rate": 4.1879290910953994e-05, "loss": 2.9824, "step": 95100 }, { "epoch": 2.44, "learning_rate": 4.1870751784677395e-05, "loss": 3.1304, "step": 95200 }, { "epoch": 2.44, "learning_rate": 4.18622126584008e-05, "loss": 3.0516, "step": 95300 }, { "epoch": 2.44, "learning_rate": 4.185367353212419e-05, "loss": 2.9625, "step": 95400 }, { "epoch": 2.45, "learning_rate": 4.18451344058476e-05, "loss": 2.9388, "step": 95500 }, { "epoch": 2.45, "learning_rate": 4.1836595279570995e-05, "loss": 3.1363, "step": 95600 }, { "epoch": 2.45, "learning_rate": 4.1828056153294396e-05, "loss": 2.9823, "step": 95700 }, { "epoch": 2.45, "learning_rate": 4.18195170270178e-05, "loss": 3.108, "step": 95800 }, { "epoch": 2.46, "learning_rate": 4.18109779007412e-05, "loss": 3.1888, "step": 95900 }, { "epoch": 2.46, "learning_rate": 4.1802438774464594e-05, "loss": 3.1066, "step": 96000 }, { "epoch": 2.46, "learning_rate": 4.1793899648188e-05, "loss": 2.9461, "step": 96100 }, { "epoch": 2.46, "learning_rate": 4.17853605219114e-05, "loss": 3.0784, "step": 96200 }, { "epoch": 2.47, "learning_rate": 4.17768213956348e-05, "loss": 3.0233, "step": 96300 }, { "epoch": 2.47, "learning_rate": 4.17682822693582e-05, "loss": 3.0728, "step": 96400 }, { "epoch": 2.47, "learning_rate": 4.17597431430816e-05, "loss": 3.0897, "step": 96500 }, { "epoch": 2.47, "learning_rate": 4.1751204016805004e-05, "loss": 3.0342, "step": 96600 }, { "epoch": 2.48, "learning_rate": 4.1742664890528405e-05, "loss": 2.9922, "step": 96700 }, { "epoch": 2.48, "learning_rate": 4.17341257642518e-05, "loss": 3.0786, "step": 96800 }, { "epoch": 2.48, "learning_rate": 4.172558663797521e-05, "loss": 2.9265, "step": 96900 }, { "epoch": 2.48, "learning_rate": 4.17170475116986e-05, "loss": 3.109, "step": 97000 }, { "epoch": 2.49, "learning_rate": 4.1708508385422004e-05, "loss": 3.229, "step": 97100 }, { "epoch": 2.49, "learning_rate": 4.1699969259145406e-05, "loss": 2.9595, "step": 97200 }, { "epoch": 2.49, "learning_rate": 4.169143013286881e-05, "loss": 2.9587, "step": 97300 }, { "epoch": 2.5, "learning_rate": 4.168289100659221e-05, "loss": 3.0812, "step": 97400 }, { "epoch": 2.5, "learning_rate": 4.167435188031561e-05, "loss": 3.1367, "step": 97500 }, { "epoch": 2.5, "learning_rate": 4.1665812754039005e-05, "loss": 3.0375, "step": 97600 }, { "epoch": 2.5, "learning_rate": 4.1657273627762414e-05, "loss": 3.1682, "step": 97700 }, { "epoch": 2.51, "learning_rate": 4.164873450148581e-05, "loss": 3.0156, "step": 97800 }, { "epoch": 2.51, "learning_rate": 4.164019537520921e-05, "loss": 3.1416, "step": 97900 }, { "epoch": 2.51, "learning_rate": 4.163165624893261e-05, "loss": 3.1544, "step": 98000 }, { "epoch": 2.51, "learning_rate": 4.162311712265601e-05, "loss": 3.1956, "step": 98100 }, { "epoch": 2.52, "learning_rate": 4.1614577996379415e-05, "loss": 3.0813, "step": 98200 }, { "epoch": 2.52, "learning_rate": 4.1606038870102816e-05, "loss": 3.0339, "step": 98300 }, { "epoch": 2.52, "learning_rate": 4.159749974382621e-05, "loss": 3.0538, "step": 98400 }, { "epoch": 2.52, "learning_rate": 4.158896061754962e-05, "loss": 3.152, "step": 98500 }, { "epoch": 2.53, "learning_rate": 4.1580421491273014e-05, "loss": 3.1042, "step": 98600 }, { "epoch": 2.53, "learning_rate": 4.1571882364996416e-05, "loss": 2.8913, "step": 98700 }, { "epoch": 2.53, "learning_rate": 4.156334323871982e-05, "loss": 3.0919, "step": 98800 }, { "epoch": 2.53, "learning_rate": 4.155480411244322e-05, "loss": 3.0475, "step": 98900 }, { "epoch": 2.54, "learning_rate": 4.1546264986166614e-05, "loss": 2.9587, "step": 99000 }, { "epoch": 2.54, "learning_rate": 4.153772585989002e-05, "loss": 3.0237, "step": 99100 }, { "epoch": 2.54, "learning_rate": 4.152918673361342e-05, "loss": 2.9342, "step": 99200 }, { "epoch": 2.54, "learning_rate": 4.152064760733682e-05, "loss": 3.0741, "step": 99300 }, { "epoch": 2.55, "learning_rate": 4.151210848106022e-05, "loss": 3.3034, "step": 99400 }, { "epoch": 2.55, "learning_rate": 4.150356935478362e-05, "loss": 2.997, "step": 99500 }, { "epoch": 2.55, "learning_rate": 4.149503022850702e-05, "loss": 3.1982, "step": 99600 }, { "epoch": 2.55, "learning_rate": 4.1486491102230424e-05, "loss": 3.0697, "step": 99700 }, { "epoch": 2.56, "learning_rate": 4.147795197595382e-05, "loss": 3.111, "step": 99800 }, { "epoch": 2.56, "learning_rate": 4.146941284967723e-05, "loss": 3.1279, "step": 99900 }, { "epoch": 2.56, "learning_rate": 4.146087372340062e-05, "loss": 3.1502, "step": 100000 }, { "epoch": 2.56, "learning_rate": 4.1452334597124024e-05, "loss": 2.8996, "step": 100100 }, { "epoch": 2.57, "learning_rate": 4.1443795470847425e-05, "loss": 3.2088, "step": 100200 }, { "epoch": 2.57, "learning_rate": 4.143525634457083e-05, "loss": 3.1219, "step": 100300 }, { "epoch": 2.57, "learning_rate": 4.142671721829423e-05, "loss": 3.0994, "step": 100400 }, { "epoch": 2.57, "learning_rate": 4.141817809201763e-05, "loss": 3.076, "step": 100500 }, { "epoch": 2.58, "learning_rate": 4.1409638965741025e-05, "loss": 3.0914, "step": 100600 }, { "epoch": 2.58, "learning_rate": 4.140109983946443e-05, "loss": 2.9685, "step": 100700 }, { "epoch": 2.58, "learning_rate": 4.139256071318783e-05, "loss": 3.3519, "step": 100800 }, { "epoch": 2.58, "learning_rate": 4.138402158691123e-05, "loss": 3.0838, "step": 100900 }, { "epoch": 2.59, "learning_rate": 4.137548246063463e-05, "loss": 3.0116, "step": 101000 }, { "epoch": 2.59, "learning_rate": 4.136694333435803e-05, "loss": 2.9562, "step": 101100 }, { "epoch": 2.59, "learning_rate": 4.135840420808143e-05, "loss": 3.1637, "step": 101200 }, { "epoch": 2.6, "learning_rate": 4.1349865081804836e-05, "loss": 3.1111, "step": 101300 }, { "epoch": 2.6, "learning_rate": 4.134132595552823e-05, "loss": 2.9523, "step": 101400 }, { "epoch": 2.6, "learning_rate": 4.133278682925163e-05, "loss": 2.9791, "step": 101500 }, { "epoch": 2.6, "learning_rate": 4.1324247702975033e-05, "loss": 3.0496, "step": 101600 }, { "epoch": 2.61, "learning_rate": 4.1315708576698435e-05, "loss": 3.0005, "step": 101700 }, { "epoch": 2.61, "learning_rate": 4.1307169450421837e-05, "loss": 3.242, "step": 101800 }, { "epoch": 2.61, "learning_rate": 4.129863032414524e-05, "loss": 2.9777, "step": 101900 }, { "epoch": 2.61, "learning_rate": 4.129009119786863e-05, "loss": 3.0187, "step": 102000 }, { "epoch": 2.62, "learning_rate": 4.128155207159204e-05, "loss": 2.9164, "step": 102100 }, { "epoch": 2.62, "learning_rate": 4.1273012945315436e-05, "loss": 3.1537, "step": 102200 }, { "epoch": 2.62, "learning_rate": 4.126447381903884e-05, "loss": 3.0327, "step": 102300 }, { "epoch": 2.62, "learning_rate": 4.125593469276224e-05, "loss": 3.3472, "step": 102400 }, { "epoch": 2.63, "learning_rate": 4.124739556648564e-05, "loss": 3.0513, "step": 102500 }, { "epoch": 2.63, "learning_rate": 4.123885644020904e-05, "loss": 3.0413, "step": 102600 }, { "epoch": 2.63, "learning_rate": 4.1230317313932444e-05, "loss": 3.041, "step": 102700 }, { "epoch": 2.63, "learning_rate": 4.122177818765584e-05, "loss": 3.0476, "step": 102800 }, { "epoch": 2.64, "learning_rate": 4.121323906137925e-05, "loss": 3.0221, "step": 102900 }, { "epoch": 2.64, "learning_rate": 4.120469993510264e-05, "loss": 3.0639, "step": 103000 }, { "epoch": 2.64, "learning_rate": 4.119616080882604e-05, "loss": 3.1602, "step": 103100 }, { "epoch": 2.64, "learning_rate": 4.1187621682549445e-05, "loss": 2.9889, "step": 103200 }, { "epoch": 2.65, "learning_rate": 4.1179082556272846e-05, "loss": 3.0175, "step": 103300 }, { "epoch": 2.65, "learning_rate": 4.117054342999625e-05, "loss": 2.9935, "step": 103400 }, { "epoch": 2.65, "learning_rate": 4.116200430371965e-05, "loss": 3.168, "step": 103500 }, { "epoch": 2.65, "learning_rate": 4.1153465177443044e-05, "loss": 3.0671, "step": 103600 }, { "epoch": 2.66, "learning_rate": 4.114492605116645e-05, "loss": 3.1425, "step": 103700 }, { "epoch": 2.66, "learning_rate": 4.113638692488985e-05, "loss": 3.0317, "step": 103800 }, { "epoch": 2.66, "learning_rate": 4.112784779861325e-05, "loss": 3.0031, "step": 103900 }, { "epoch": 2.66, "learning_rate": 4.111930867233665e-05, "loss": 3.0538, "step": 104000 }, { "epoch": 2.67, "learning_rate": 4.111076954606005e-05, "loss": 2.9468, "step": 104100 }, { "epoch": 2.67, "learning_rate": 4.110223041978345e-05, "loss": 2.9273, "step": 104200 }, { "epoch": 2.67, "learning_rate": 4.1093691293506855e-05, "loss": 2.8708, "step": 104300 }, { "epoch": 2.67, "learning_rate": 4.108515216723025e-05, "loss": 3.0507, "step": 104400 }, { "epoch": 2.68, "learning_rate": 4.107661304095365e-05, "loss": 3.059, "step": 104500 }, { "epoch": 2.68, "learning_rate": 4.106807391467705e-05, "loss": 2.9688, "step": 104600 }, { "epoch": 2.68, "learning_rate": 4.1059534788400454e-05, "loss": 3.0545, "step": 104700 }, { "epoch": 2.68, "learning_rate": 4.1050995662123856e-05, "loss": 3.1936, "step": 104800 }, { "epoch": 2.69, "learning_rate": 4.104245653584726e-05, "loss": 3.158, "step": 104900 }, { "epoch": 2.69, "learning_rate": 4.103391740957065e-05, "loss": 3.0972, "step": 105000 }, { "epoch": 2.69, "learning_rate": 4.102537828329406e-05, "loss": 3.2179, "step": 105100 }, { "epoch": 2.69, "learning_rate": 4.1016839157017455e-05, "loss": 3.1017, "step": 105200 }, { "epoch": 2.7, "learning_rate": 4.100830003074086e-05, "loss": 3.0019, "step": 105300 }, { "epoch": 2.7, "learning_rate": 4.099976090446426e-05, "loss": 3.1353, "step": 105400 }, { "epoch": 2.7, "learning_rate": 4.099122177818765e-05, "loss": 3.1709, "step": 105500 }, { "epoch": 2.71, "learning_rate": 4.098268265191106e-05, "loss": 2.9705, "step": 105600 }, { "epoch": 2.71, "learning_rate": 4.0974143525634456e-05, "loss": 3.1053, "step": 105700 }, { "epoch": 2.71, "learning_rate": 4.096560439935786e-05, "loss": 3.0786, "step": 105800 }, { "epoch": 2.71, "learning_rate": 4.095706527308126e-05, "loss": 3.1098, "step": 105900 }, { "epoch": 2.72, "learning_rate": 4.094852614680466e-05, "loss": 2.8744, "step": 106000 }, { "epoch": 2.72, "learning_rate": 4.0939987020528056e-05, "loss": 2.9382, "step": 106100 }, { "epoch": 2.72, "learning_rate": 4.0931447894251464e-05, "loss": 2.9395, "step": 106200 }, { "epoch": 2.72, "learning_rate": 4.092290876797486e-05, "loss": 3.053, "step": 106300 }, { "epoch": 2.73, "learning_rate": 4.091436964169826e-05, "loss": 3.1599, "step": 106400 }, { "epoch": 2.73, "learning_rate": 4.090583051542166e-05, "loss": 3.0938, "step": 106500 }, { "epoch": 2.73, "learning_rate": 4.0897291389145063e-05, "loss": 2.9966, "step": 106600 }, { "epoch": 2.73, "learning_rate": 4.0888752262868465e-05, "loss": 3.1077, "step": 106700 }, { "epoch": 2.74, "learning_rate": 4.0880213136591867e-05, "loss": 3.1119, "step": 106800 }, { "epoch": 2.74, "learning_rate": 4.087167401031526e-05, "loss": 3.1429, "step": 106900 }, { "epoch": 2.74, "learning_rate": 4.086313488403867e-05, "loss": 3.0915, "step": 107000 }, { "epoch": 2.74, "learning_rate": 4.0854595757762064e-05, "loss": 3.324, "step": 107100 }, { "epoch": 2.75, "learning_rate": 4.0846056631485466e-05, "loss": 3.1729, "step": 107200 }, { "epoch": 2.75, "learning_rate": 4.083751750520887e-05, "loss": 2.9374, "step": 107300 }, { "epoch": 2.75, "learning_rate": 4.082897837893227e-05, "loss": 3.3158, "step": 107400 }, { "epoch": 2.75, "learning_rate": 4.082043925265567e-05, "loss": 3.0624, "step": 107500 }, { "epoch": 2.76, "learning_rate": 4.081190012637907e-05, "loss": 3.199, "step": 107600 }, { "epoch": 2.76, "learning_rate": 4.080336100010247e-05, "loss": 3.0148, "step": 107700 }, { "epoch": 2.76, "learning_rate": 4.0794821873825875e-05, "loss": 3.0849, "step": 107800 }, { "epoch": 2.76, "learning_rate": 4.078628274754927e-05, "loss": 2.9066, "step": 107900 }, { "epoch": 2.77, "learning_rate": 4.077774362127267e-05, "loss": 3.0127, "step": 108000 }, { "epoch": 2.77, "learning_rate": 4.076920449499607e-05, "loss": 2.9849, "step": 108100 }, { "epoch": 2.77, "learning_rate": 4.0760665368719475e-05, "loss": 2.8326, "step": 108200 }, { "epoch": 2.77, "learning_rate": 4.0752126242442876e-05, "loss": 3.1002, "step": 108300 }, { "epoch": 2.78, "learning_rate": 4.074358711616628e-05, "loss": 2.8001, "step": 108400 }, { "epoch": 2.78, "learning_rate": 4.073504798988967e-05, "loss": 2.9754, "step": 108500 }, { "epoch": 2.78, "learning_rate": 4.072650886361308e-05, "loss": 3.1234, "step": 108600 }, { "epoch": 2.78, "learning_rate": 4.0717969737336476e-05, "loss": 3.0748, "step": 108700 }, { "epoch": 2.79, "learning_rate": 4.070943061105988e-05, "loss": 3.0435, "step": 108800 }, { "epoch": 2.79, "learning_rate": 4.070089148478328e-05, "loss": 3.0546, "step": 108900 }, { "epoch": 2.79, "learning_rate": 4.069235235850668e-05, "loss": 3.2162, "step": 109000 }, { "epoch": 2.79, "learning_rate": 4.0683813232230075e-05, "loss": 2.8097, "step": 109100 }, { "epoch": 2.8, "learning_rate": 4.067527410595348e-05, "loss": 2.949, "step": 109200 }, { "epoch": 2.8, "learning_rate": 4.066673497967688e-05, "loss": 3.0555, "step": 109300 }, { "epoch": 2.8, "learning_rate": 4.065819585340028e-05, "loss": 2.9504, "step": 109400 }, { "epoch": 2.81, "learning_rate": 4.064965672712368e-05, "loss": 3.1909, "step": 109500 }, { "epoch": 2.81, "learning_rate": 4.064111760084708e-05, "loss": 2.7929, "step": 109600 }, { "epoch": 2.81, "learning_rate": 4.0632578474570484e-05, "loss": 3.0875, "step": 109700 }, { "epoch": 2.81, "learning_rate": 4.0624039348293886e-05, "loss": 3.0052, "step": 109800 }, { "epoch": 2.82, "learning_rate": 4.061550022201728e-05, "loss": 2.9049, "step": 109900 }, { "epoch": 2.82, "learning_rate": 4.060696109574069e-05, "loss": 3.0253, "step": 110000 }, { "epoch": 2.82, "learning_rate": 4.0598421969464084e-05, "loss": 3.1229, "step": 110100 }, { "epoch": 2.82, "learning_rate": 4.0589882843187485e-05, "loss": 2.993, "step": 110200 }, { "epoch": 2.83, "learning_rate": 4.058134371691089e-05, "loss": 3.04, "step": 110300 }, { "epoch": 2.83, "learning_rate": 4.057280459063429e-05, "loss": 3.0167, "step": 110400 }, { "epoch": 2.83, "learning_rate": 4.056426546435769e-05, "loss": 3.3009, "step": 110500 }, { "epoch": 2.83, "learning_rate": 4.055572633808109e-05, "loss": 3.1617, "step": 110600 }, { "epoch": 2.84, "learning_rate": 4.0547187211804486e-05, "loss": 3.1479, "step": 110700 }, { "epoch": 2.84, "learning_rate": 4.0538648085527895e-05, "loss": 3.1471, "step": 110800 }, { "epoch": 2.84, "learning_rate": 4.053010895925129e-05, "loss": 3.046, "step": 110900 }, { "epoch": 2.84, "learning_rate": 4.052156983297469e-05, "loss": 3.1228, "step": 111000 }, { "epoch": 2.85, "learning_rate": 4.051303070669809e-05, "loss": 2.9721, "step": 111100 }, { "epoch": 2.85, "learning_rate": 4.0504491580421494e-05, "loss": 2.9042, "step": 111200 }, { "epoch": 2.85, "learning_rate": 4.049595245414489e-05, "loss": 3.1024, "step": 111300 }, { "epoch": 2.85, "learning_rate": 4.04874133278683e-05, "loss": 3.2672, "step": 111400 }, { "epoch": 2.86, "learning_rate": 4.047887420159169e-05, "loss": 3.0472, "step": 111500 }, { "epoch": 2.86, "learning_rate": 4.0470335075315093e-05, "loss": 3.0819, "step": 111600 }, { "epoch": 2.86, "learning_rate": 4.0461795949038495e-05, "loss": 3.0193, "step": 111700 }, { "epoch": 2.86, "learning_rate": 4.0453256822761897e-05, "loss": 2.9185, "step": 111800 }, { "epoch": 2.87, "learning_rate": 4.04447176964853e-05, "loss": 3.3109, "step": 111900 }, { "epoch": 2.87, "learning_rate": 4.04361785702087e-05, "loss": 2.9213, "step": 112000 }, { "epoch": 2.87, "learning_rate": 4.0427639443932094e-05, "loss": 2.9948, "step": 112100 }, { "epoch": 2.87, "learning_rate": 4.04191003176555e-05, "loss": 2.936, "step": 112200 }, { "epoch": 2.88, "learning_rate": 4.04105611913789e-05, "loss": 3.2933, "step": 112300 }, { "epoch": 2.88, "learning_rate": 4.04020220651023e-05, "loss": 3.0799, "step": 112400 }, { "epoch": 2.88, "learning_rate": 4.03934829388257e-05, "loss": 3.0602, "step": 112500 }, { "epoch": 2.88, "learning_rate": 4.03849438125491e-05, "loss": 3.1894, "step": 112600 }, { "epoch": 2.89, "learning_rate": 4.0376404686272504e-05, "loss": 2.8404, "step": 112700 }, { "epoch": 2.89, "learning_rate": 4.0367865559995905e-05, "loss": 2.8567, "step": 112800 }, { "epoch": 2.89, "learning_rate": 4.03593264337193e-05, "loss": 2.9545, "step": 112900 }, { "epoch": 2.89, "learning_rate": 4.035078730744271e-05, "loss": 3.1068, "step": 113000 }, { "epoch": 2.9, "learning_rate": 4.03422481811661e-05, "loss": 3.0017, "step": 113100 }, { "epoch": 2.9, "learning_rate": 4.0333709054889505e-05, "loss": 2.9809, "step": 113200 }, { "epoch": 2.9, "learning_rate": 4.0325169928612906e-05, "loss": 2.9555, "step": 113300 }, { "epoch": 2.91, "learning_rate": 4.031663080233631e-05, "loss": 2.7379, "step": 113400 }, { "epoch": 2.91, "learning_rate": 4.030809167605971e-05, "loss": 2.882, "step": 113500 }, { "epoch": 2.91, "learning_rate": 4.029955254978311e-05, "loss": 2.8831, "step": 113600 }, { "epoch": 2.91, "learning_rate": 4.0291013423506506e-05, "loss": 3.051, "step": 113700 }, { "epoch": 2.92, "learning_rate": 4.0282474297229914e-05, "loss": 3.0809, "step": 113800 }, { "epoch": 2.92, "learning_rate": 4.027393517095331e-05, "loss": 2.9474, "step": 113900 }, { "epoch": 2.92, "learning_rate": 4.026539604467671e-05, "loss": 3.083, "step": 114000 }, { "epoch": 2.92, "learning_rate": 4.025685691840011e-05, "loss": 3.0755, "step": 114100 }, { "epoch": 2.93, "learning_rate": 4.024831779212351e-05, "loss": 3.2383, "step": 114200 }, { "epoch": 2.93, "learning_rate": 4.023977866584691e-05, "loss": 2.9592, "step": 114300 }, { "epoch": 2.93, "learning_rate": 4.0231239539570316e-05, "loss": 2.841, "step": 114400 }, { "epoch": 2.93, "learning_rate": 4.022270041329371e-05, "loss": 2.9557, "step": 114500 }, { "epoch": 2.94, "learning_rate": 4.021416128701711e-05, "loss": 3.0037, "step": 114600 }, { "epoch": 2.94, "learning_rate": 4.0205622160740514e-05, "loss": 2.8829, "step": 114700 }, { "epoch": 2.94, "learning_rate": 4.0197083034463916e-05, "loss": 3.0496, "step": 114800 }, { "epoch": 2.94, "learning_rate": 4.018854390818732e-05, "loss": 3.0789, "step": 114900 }, { "epoch": 2.95, "learning_rate": 4.018000478191072e-05, "loss": 2.9696, "step": 115000 }, { "epoch": 2.95, "learning_rate": 4.0171465655634114e-05, "loss": 2.9391, "step": 115100 }, { "epoch": 2.95, "learning_rate": 4.016292652935752e-05, "loss": 3.196, "step": 115200 }, { "epoch": 2.95, "learning_rate": 4.015438740308092e-05, "loss": 2.8523, "step": 115300 }, { "epoch": 2.96, "learning_rate": 4.014584827680432e-05, "loss": 3.1546, "step": 115400 }, { "epoch": 2.96, "learning_rate": 4.013730915052772e-05, "loss": 2.8606, "step": 115500 }, { "epoch": 2.96, "learning_rate": 4.012877002425112e-05, "loss": 3.0395, "step": 115600 }, { "epoch": 2.96, "learning_rate": 4.012023089797452e-05, "loss": 2.8805, "step": 115700 }, { "epoch": 2.97, "learning_rate": 4.0111691771697925e-05, "loss": 3.0013, "step": 115800 }, { "epoch": 2.97, "learning_rate": 4.010315264542132e-05, "loss": 3.0258, "step": 115900 }, { "epoch": 2.97, "learning_rate": 4.009461351914473e-05, "loss": 2.9218, "step": 116000 }, { "epoch": 2.97, "learning_rate": 4.008607439286812e-05, "loss": 3.0788, "step": 116100 }, { "epoch": 2.98, "learning_rate": 4.0077535266591524e-05, "loss": 3.0068, "step": 116200 }, { "epoch": 2.98, "learning_rate": 4.0068996140314926e-05, "loss": 3.1724, "step": 116300 }, { "epoch": 2.98, "learning_rate": 4.006045701403833e-05, "loss": 3.103, "step": 116400 }, { "epoch": 2.98, "learning_rate": 4.005191788776172e-05, "loss": 3.1552, "step": 116500 }, { "epoch": 2.99, "learning_rate": 4.004337876148513e-05, "loss": 3.036, "step": 116600 }, { "epoch": 2.99, "learning_rate": 4.0034839635208525e-05, "loss": 3.2177, "step": 116700 }, { "epoch": 2.99, "learning_rate": 4.0026300508931927e-05, "loss": 3.1162, "step": 116800 }, { "epoch": 2.99, "learning_rate": 4.001776138265533e-05, "loss": 3.0141, "step": 116900 }, { "epoch": 3.0, "learning_rate": 4.000922225637873e-05, "loss": 2.9341, "step": 117000 }, { "epoch": 3.0, "learning_rate": 4.000068313010213e-05, "loss": 2.9165, "step": 117100 }, { "epoch": 3.0, "learning_rate": 3.999214400382553e-05, "loss": 3.046, "step": 117200 }, { "epoch": 3.0, "learning_rate": 3.998360487754893e-05, "loss": 2.8234, "step": 117300 }, { "epoch": 3.01, "learning_rate": 3.9975065751272336e-05, "loss": 3.1521, "step": 117400 }, { "epoch": 3.01, "learning_rate": 3.996652662499573e-05, "loss": 3.014, "step": 117500 }, { "epoch": 3.01, "learning_rate": 3.995798749871913e-05, "loss": 3.1095, "step": 117600 }, { "epoch": 3.02, "learning_rate": 3.9949448372442534e-05, "loss": 3.0437, "step": 117700 }, { "epoch": 3.02, "learning_rate": 3.9940909246165935e-05, "loss": 2.9899, "step": 117800 }, { "epoch": 3.02, "learning_rate": 3.993237011988934e-05, "loss": 2.9003, "step": 117900 }, { "epoch": 3.02, "learning_rate": 3.992383099361274e-05, "loss": 3.0027, "step": 118000 }, { "epoch": 3.03, "learning_rate": 3.991529186733613e-05, "loss": 3.1018, "step": 118100 }, { "epoch": 3.03, "learning_rate": 3.990675274105954e-05, "loss": 2.9672, "step": 118200 }, { "epoch": 3.03, "learning_rate": 3.9898213614782936e-05, "loss": 2.9003, "step": 118300 }, { "epoch": 3.03, "learning_rate": 3.988967448850634e-05, "loss": 2.8893, "step": 118400 }, { "epoch": 3.04, "learning_rate": 3.988113536222974e-05, "loss": 3.148, "step": 118500 }, { "epoch": 3.04, "learning_rate": 3.987259623595314e-05, "loss": 2.9243, "step": 118600 }, { "epoch": 3.04, "learning_rate": 3.986405710967654e-05, "loss": 2.9773, "step": 118700 }, { "epoch": 3.04, "learning_rate": 3.9855517983399944e-05, "loss": 2.8417, "step": 118800 }, { "epoch": 3.05, "learning_rate": 3.984697885712334e-05, "loss": 3.0683, "step": 118900 }, { "epoch": 3.05, "learning_rate": 3.983843973084675e-05, "loss": 2.9911, "step": 119000 }, { "epoch": 3.05, "learning_rate": 3.982990060457014e-05, "loss": 2.8986, "step": 119100 }, { "epoch": 3.05, "learning_rate": 3.982136147829354e-05, "loss": 3.2045, "step": 119200 }, { "epoch": 3.06, "learning_rate": 3.9812822352016945e-05, "loss": 2.8607, "step": 119300 }, { "epoch": 3.06, "learning_rate": 3.9804283225740346e-05, "loss": 2.9648, "step": 119400 }, { "epoch": 3.06, "learning_rate": 3.979574409946374e-05, "loss": 2.9939, "step": 119500 }, { "epoch": 3.06, "learning_rate": 3.978720497318715e-05, "loss": 3.1878, "step": 119600 }, { "epoch": 3.07, "learning_rate": 3.9778665846910544e-05, "loss": 2.9334, "step": 119700 }, { "epoch": 3.07, "learning_rate": 3.9770126720633946e-05, "loss": 2.9575, "step": 119800 }, { "epoch": 3.07, "learning_rate": 3.976158759435735e-05, "loss": 2.8912, "step": 119900 }, { "epoch": 3.07, "learning_rate": 3.975304846808075e-05, "loss": 2.7881, "step": 120000 }, { "epoch": 3.08, "learning_rate": 3.974450934180415e-05, "loss": 3.0064, "step": 120100 }, { "epoch": 3.08, "learning_rate": 3.973597021552755e-05, "loss": 2.963, "step": 120200 }, { "epoch": 3.08, "learning_rate": 3.972743108925095e-05, "loss": 3.2183, "step": 120300 }, { "epoch": 3.08, "learning_rate": 3.9718891962974355e-05, "loss": 2.8965, "step": 120400 }, { "epoch": 3.09, "learning_rate": 3.971035283669775e-05, "loss": 2.8146, "step": 120500 }, { "epoch": 3.09, "learning_rate": 3.970181371042115e-05, "loss": 2.9657, "step": 120600 }, { "epoch": 3.09, "learning_rate": 3.969327458414455e-05, "loss": 3.0492, "step": 120700 }, { "epoch": 3.09, "learning_rate": 3.9684735457867955e-05, "loss": 3.0475, "step": 120800 }, { "epoch": 3.1, "learning_rate": 3.9676196331591356e-05, "loss": 2.8418, "step": 120900 }, { "epoch": 3.1, "learning_rate": 3.966765720531476e-05, "loss": 2.9023, "step": 121000 }, { "epoch": 3.1, "learning_rate": 3.965911807903815e-05, "loss": 2.9436, "step": 121100 }, { "epoch": 3.1, "learning_rate": 3.965057895276156e-05, "loss": 3.025, "step": 121200 }, { "epoch": 3.11, "learning_rate": 3.9642039826484956e-05, "loss": 3.0259, "step": 121300 }, { "epoch": 3.11, "learning_rate": 3.963350070020836e-05, "loss": 3.0025, "step": 121400 }, { "epoch": 3.11, "learning_rate": 3.962496157393176e-05, "loss": 2.8463, "step": 121500 }, { "epoch": 3.12, "learning_rate": 3.961642244765516e-05, "loss": 3.0121, "step": 121600 }, { "epoch": 3.12, "learning_rate": 3.9607883321378555e-05, "loss": 2.9603, "step": 121700 }, { "epoch": 3.12, "learning_rate": 3.959934419510196e-05, "loss": 2.9895, "step": 121800 }, { "epoch": 3.12, "learning_rate": 3.959080506882536e-05, "loss": 2.9055, "step": 121900 }, { "epoch": 3.13, "learning_rate": 3.9582265942548766e-05, "loss": 2.9311, "step": 122000 }, { "epoch": 3.13, "learning_rate": 3.957372681627216e-05, "loss": 2.9778, "step": 122100 }, { "epoch": 3.13, "learning_rate": 3.956518768999556e-05, "loss": 2.9807, "step": 122200 }, { "epoch": 3.13, "learning_rate": 3.9556648563718964e-05, "loss": 3.1473, "step": 122300 }, { "epoch": 3.14, "learning_rate": 3.9548109437442366e-05, "loss": 3.1106, "step": 122400 }, { "epoch": 3.14, "learning_rate": 3.953957031116576e-05, "loss": 2.8529, "step": 122500 }, { "epoch": 3.14, "learning_rate": 3.953103118488917e-05, "loss": 3.0184, "step": 122600 }, { "epoch": 3.14, "learning_rate": 3.9522492058612564e-05, "loss": 3.0022, "step": 122700 }, { "epoch": 3.15, "learning_rate": 3.9513952932335965e-05, "loss": 3.1624, "step": 122800 }, { "epoch": 3.15, "learning_rate": 3.950541380605937e-05, "loss": 2.8583, "step": 122900 }, { "epoch": 3.15, "learning_rate": 3.949687467978276e-05, "loss": 2.9685, "step": 123000 }, { "epoch": 3.15, "learning_rate": 3.948833555350617e-05, "loss": 3.1809, "step": 123100 }, { "epoch": 3.16, "learning_rate": 3.9479796427229565e-05, "loss": 3.116, "step": 123200 }, { "epoch": 3.16, "learning_rate": 3.9471257300952966e-05, "loss": 3.0942, "step": 123300 }, { "epoch": 3.16, "learning_rate": 3.946271817467637e-05, "loss": 3.0704, "step": 123400 }, { "epoch": 3.16, "learning_rate": 3.945417904839977e-05, "loss": 3.0432, "step": 123500 }, { "epoch": 3.17, "learning_rate": 3.944563992212317e-05, "loss": 2.9057, "step": 123600 }, { "epoch": 3.17, "learning_rate": 3.943710079584657e-05, "loss": 2.9233, "step": 123700 }, { "epoch": 3.17, "learning_rate": 3.942856166956997e-05, "loss": 2.9568, "step": 123800 }, { "epoch": 3.17, "learning_rate": 3.9420022543293375e-05, "loss": 2.9892, "step": 123900 }, { "epoch": 3.18, "learning_rate": 3.941148341701677e-05, "loss": 3.1241, "step": 124000 }, { "epoch": 3.18, "learning_rate": 3.940294429074017e-05, "loss": 3.1531, "step": 124100 }, { "epoch": 3.18, "learning_rate": 3.939440516446357e-05, "loss": 2.8468, "step": 124200 }, { "epoch": 3.18, "learning_rate": 3.9385866038186975e-05, "loss": 3.1295, "step": 124300 }, { "epoch": 3.19, "learning_rate": 3.937732691191037e-05, "loss": 2.959, "step": 124400 }, { "epoch": 3.19, "learning_rate": 3.936878778563378e-05, "loss": 2.9155, "step": 124500 }, { "epoch": 3.19, "learning_rate": 3.936024865935717e-05, "loss": 3.0969, "step": 124600 }, { "epoch": 3.19, "learning_rate": 3.9351709533080574e-05, "loss": 2.978, "step": 124700 }, { "epoch": 3.2, "learning_rate": 3.9343170406803976e-05, "loss": 3.0562, "step": 124800 }, { "epoch": 3.2, "learning_rate": 3.933463128052738e-05, "loss": 2.9286, "step": 124900 }, { "epoch": 3.2, "learning_rate": 3.932609215425078e-05, "loss": 2.9911, "step": 125000 }, { "epoch": 3.2, "learning_rate": 3.931755302797418e-05, "loss": 2.9229, "step": 125100 }, { "epoch": 3.21, "learning_rate": 3.9309013901697575e-05, "loss": 3.0515, "step": 125200 }, { "epoch": 3.21, "learning_rate": 3.9300474775420984e-05, "loss": 3.0484, "step": 125300 }, { "epoch": 3.21, "learning_rate": 3.929193564914438e-05, "loss": 2.9142, "step": 125400 }, { "epoch": 3.21, "learning_rate": 3.928339652286778e-05, "loss": 3.0388, "step": 125500 }, { "epoch": 3.22, "learning_rate": 3.927485739659118e-05, "loss": 2.9048, "step": 125600 }, { "epoch": 3.22, "learning_rate": 3.926631827031458e-05, "loss": 2.8863, "step": 125700 }, { "epoch": 3.22, "learning_rate": 3.9257779144037985e-05, "loss": 2.9024, "step": 125800 }, { "epoch": 3.23, "learning_rate": 3.9249240017761386e-05, "loss": 2.828, "step": 125900 }, { "epoch": 3.23, "learning_rate": 3.924070089148478e-05, "loss": 2.8643, "step": 126000 }, { "epoch": 3.23, "learning_rate": 3.923216176520819e-05, "loss": 3.0604, "step": 126100 }, { "epoch": 3.23, "learning_rate": 3.9223622638931584e-05, "loss": 3.0527, "step": 126200 }, { "epoch": 3.24, "learning_rate": 3.9215083512654985e-05, "loss": 3.0711, "step": 126300 }, { "epoch": 3.24, "learning_rate": 3.920654438637839e-05, "loss": 2.9576, "step": 126400 }, { "epoch": 3.24, "learning_rate": 3.919800526010179e-05, "loss": 2.8507, "step": 126500 }, { "epoch": 3.24, "learning_rate": 3.918946613382518e-05, "loss": 2.9436, "step": 126600 }, { "epoch": 3.25, "learning_rate": 3.918092700754859e-05, "loss": 3.0334, "step": 126700 }, { "epoch": 3.25, "learning_rate": 3.9172387881271986e-05, "loss": 3.071, "step": 126800 }, { "epoch": 3.25, "learning_rate": 3.9163848754995395e-05, "loss": 2.9778, "step": 126900 }, { "epoch": 3.25, "learning_rate": 3.915530962871879e-05, "loss": 3.2001, "step": 127000 }, { "epoch": 3.26, "learning_rate": 3.914677050244219e-05, "loss": 3.0323, "step": 127100 }, { "epoch": 3.26, "learning_rate": 3.913823137616559e-05, "loss": 2.9486, "step": 127200 }, { "epoch": 3.26, "learning_rate": 3.9129692249888994e-05, "loss": 2.965, "step": 127300 }, { "epoch": 3.26, "learning_rate": 3.912115312361239e-05, "loss": 2.7533, "step": 127400 }, { "epoch": 3.27, "learning_rate": 3.91126139973358e-05, "loss": 2.985, "step": 127500 }, { "epoch": 3.27, "learning_rate": 3.910407487105919e-05, "loss": 2.9597, "step": 127600 }, { "epoch": 3.27, "learning_rate": 3.9095535744782594e-05, "loss": 3.1081, "step": 127700 }, { "epoch": 3.27, "learning_rate": 3.9086996618505995e-05, "loss": 2.8903, "step": 127800 }, { "epoch": 3.28, "learning_rate": 3.90784574922294e-05, "loss": 2.6809, "step": 127900 }, { "epoch": 3.28, "learning_rate": 3.90699183659528e-05, "loss": 3.1497, "step": 128000 }, { "epoch": 3.28, "learning_rate": 3.90613792396762e-05, "loss": 3.027, "step": 128100 }, { "epoch": 3.28, "learning_rate": 3.9052840113399595e-05, "loss": 3.0816, "step": 128200 }, { "epoch": 3.29, "learning_rate": 3.9044300987123e-05, "loss": 2.9045, "step": 128300 }, { "epoch": 3.29, "learning_rate": 3.90357618608464e-05, "loss": 3.1785, "step": 128400 }, { "epoch": 3.29, "learning_rate": 3.90272227345698e-05, "loss": 2.9222, "step": 128500 }, { "epoch": 3.29, "learning_rate": 3.90186836082932e-05, "loss": 3.013, "step": 128600 }, { "epoch": 3.3, "learning_rate": 3.90101444820166e-05, "loss": 2.863, "step": 128700 }, { "epoch": 3.3, "learning_rate": 3.9001605355740004e-05, "loss": 3.0084, "step": 128800 }, { "epoch": 3.3, "learning_rate": 3.8993066229463405e-05, "loss": 2.9316, "step": 128900 }, { "epoch": 3.3, "learning_rate": 3.89845271031868e-05, "loss": 2.8692, "step": 129000 }, { "epoch": 3.31, "learning_rate": 3.897598797691021e-05, "loss": 2.8845, "step": 129100 }, { "epoch": 3.31, "learning_rate": 3.89674488506336e-05, "loss": 2.9282, "step": 129200 }, { "epoch": 3.31, "learning_rate": 3.8958909724357005e-05, "loss": 2.9483, "step": 129300 }, { "epoch": 3.31, "learning_rate": 3.8950370598080406e-05, "loss": 3.0091, "step": 129400 }, { "epoch": 3.32, "learning_rate": 3.894183147180381e-05, "loss": 2.9191, "step": 129500 }, { "epoch": 3.32, "learning_rate": 3.89332923455272e-05, "loss": 3.0748, "step": 129600 }, { "epoch": 3.32, "learning_rate": 3.892475321925061e-05, "loss": 2.9653, "step": 129700 }, { "epoch": 3.33, "learning_rate": 3.8916214092974006e-05, "loss": 2.7149, "step": 129800 }, { "epoch": 3.33, "learning_rate": 3.890767496669741e-05, "loss": 3.0052, "step": 129900 }, { "epoch": 3.33, "learning_rate": 3.889913584042081e-05, "loss": 3.1336, "step": 130000 }, { "epoch": 3.33, "learning_rate": 3.889059671414421e-05, "loss": 2.8969, "step": 130100 }, { "epoch": 3.34, "learning_rate": 3.888205758786761e-05, "loss": 2.817, "step": 130200 }, { "epoch": 3.34, "learning_rate": 3.8873518461591014e-05, "loss": 2.995, "step": 130300 }, { "epoch": 3.34, "learning_rate": 3.886497933531441e-05, "loss": 2.8422, "step": 130400 }, { "epoch": 3.34, "learning_rate": 3.885644020903782e-05, "loss": 2.9654, "step": 130500 }, { "epoch": 3.35, "learning_rate": 3.884790108276121e-05, "loss": 2.793, "step": 130600 }, { "epoch": 3.35, "learning_rate": 3.883936195648461e-05, "loss": 3.042, "step": 130700 }, { "epoch": 3.35, "learning_rate": 3.8830822830208015e-05, "loss": 3.0508, "step": 130800 }, { "epoch": 3.35, "learning_rate": 3.8822283703931416e-05, "loss": 2.8977, "step": 130900 }, { "epoch": 3.36, "learning_rate": 3.881374457765482e-05, "loss": 2.8101, "step": 131000 }, { "epoch": 3.36, "learning_rate": 3.880520545137822e-05, "loss": 2.9051, "step": 131100 }, { "epoch": 3.36, "learning_rate": 3.8796666325101614e-05, "loss": 2.9152, "step": 131200 }, { "epoch": 3.36, "learning_rate": 3.878812719882502e-05, "loss": 2.9743, "step": 131300 }, { "epoch": 3.37, "learning_rate": 3.877958807254842e-05, "loss": 3.0049, "step": 131400 }, { "epoch": 3.37, "learning_rate": 3.877104894627182e-05, "loss": 3.0142, "step": 131500 }, { "epoch": 3.37, "learning_rate": 3.876250981999522e-05, "loss": 2.9883, "step": 131600 }, { "epoch": 3.37, "learning_rate": 3.875397069371862e-05, "loss": 2.9499, "step": 131700 }, { "epoch": 3.38, "learning_rate": 3.874543156744202e-05, "loss": 2.8573, "step": 131800 }, { "epoch": 3.38, "learning_rate": 3.8736892441165425e-05, "loss": 2.9085, "step": 131900 }, { "epoch": 3.38, "learning_rate": 3.872835331488882e-05, "loss": 2.8178, "step": 132000 }, { "epoch": 3.38, "learning_rate": 3.871981418861223e-05, "loss": 2.8433, "step": 132100 }, { "epoch": 3.39, "learning_rate": 3.871127506233562e-05, "loss": 2.8564, "step": 132200 }, { "epoch": 3.39, "learning_rate": 3.8702735936059024e-05, "loss": 2.9895, "step": 132300 }, { "epoch": 3.39, "learning_rate": 3.8694196809782426e-05, "loss": 3.0891, "step": 132400 }, { "epoch": 3.39, "learning_rate": 3.868565768350583e-05, "loss": 2.7649, "step": 132500 }, { "epoch": 3.4, "learning_rate": 3.867711855722922e-05, "loss": 3.0051, "step": 132600 }, { "epoch": 3.4, "learning_rate": 3.866857943095263e-05, "loss": 2.9748, "step": 132700 }, { "epoch": 3.4, "learning_rate": 3.8660040304676025e-05, "loss": 2.8772, "step": 132800 }, { "epoch": 3.4, "learning_rate": 3.865150117839943e-05, "loss": 3.038, "step": 132900 }, { "epoch": 3.41, "learning_rate": 3.864296205212283e-05, "loss": 2.9276, "step": 133000 }, { "epoch": 3.41, "learning_rate": 3.863442292584623e-05, "loss": 2.8391, "step": 133100 }, { "epoch": 3.41, "learning_rate": 3.862588379956963e-05, "loss": 2.8938, "step": 133200 }, { "epoch": 3.41, "learning_rate": 3.861734467329303e-05, "loss": 2.5677, "step": 133300 }, { "epoch": 3.42, "learning_rate": 3.860880554701643e-05, "loss": 2.934, "step": 133400 }, { "epoch": 3.42, "learning_rate": 3.8600266420739836e-05, "loss": 2.9272, "step": 133500 }, { "epoch": 3.42, "learning_rate": 3.859172729446323e-05, "loss": 3.0463, "step": 133600 }, { "epoch": 3.43, "learning_rate": 3.858318816818663e-05, "loss": 3.1404, "step": 133700 }, { "epoch": 3.43, "learning_rate": 3.8574649041910034e-05, "loss": 2.9115, "step": 133800 }, { "epoch": 3.43, "learning_rate": 3.8566109915633435e-05, "loss": 2.9446, "step": 133900 }, { "epoch": 3.43, "learning_rate": 3.855757078935684e-05, "loss": 2.7483, "step": 134000 }, { "epoch": 3.44, "learning_rate": 3.854903166308024e-05, "loss": 3.0783, "step": 134100 }, { "epoch": 3.44, "learning_rate": 3.854049253680363e-05, "loss": 2.9553, "step": 134200 }, { "epoch": 3.44, "learning_rate": 3.853195341052704e-05, "loss": 2.896, "step": 134300 }, { "epoch": 3.44, "learning_rate": 3.8523414284250436e-05, "loss": 3.1303, "step": 134400 }, { "epoch": 3.45, "learning_rate": 3.851487515797384e-05, "loss": 3.0356, "step": 134500 }, { "epoch": 3.45, "learning_rate": 3.850633603169724e-05, "loss": 2.8436, "step": 134600 }, { "epoch": 3.45, "learning_rate": 3.849779690542064e-05, "loss": 2.9615, "step": 134700 }, { "epoch": 3.45, "learning_rate": 3.8489257779144036e-05, "loss": 2.8318, "step": 134800 }, { "epoch": 3.46, "learning_rate": 3.8480718652867444e-05, "loss": 3.1643, "step": 134900 }, { "epoch": 3.46, "learning_rate": 3.847217952659084e-05, "loss": 3.0151, "step": 135000 }, { "epoch": 3.46, "learning_rate": 3.846364040031424e-05, "loss": 3.0796, "step": 135100 }, { "epoch": 3.46, "learning_rate": 3.845510127403764e-05, "loss": 2.7781, "step": 135200 }, { "epoch": 3.47, "learning_rate": 3.8446562147761044e-05, "loss": 3.1224, "step": 135300 }, { "epoch": 3.47, "learning_rate": 3.8438023021484445e-05, "loss": 2.6578, "step": 135400 }, { "epoch": 3.47, "learning_rate": 3.842948389520785e-05, "loss": 2.9786, "step": 135500 }, { "epoch": 3.47, "learning_rate": 3.842094476893124e-05, "loss": 2.8441, "step": 135600 }, { "epoch": 3.48, "learning_rate": 3.841240564265465e-05, "loss": 2.7938, "step": 135700 }, { "epoch": 3.48, "learning_rate": 3.8403866516378044e-05, "loss": 3.0032, "step": 135800 }, { "epoch": 3.48, "learning_rate": 3.8395327390101446e-05, "loss": 2.8721, "step": 135900 }, { "epoch": 3.48, "learning_rate": 3.838678826382485e-05, "loss": 2.8084, "step": 136000 }, { "epoch": 3.49, "learning_rate": 3.837824913754825e-05, "loss": 2.8603, "step": 136100 }, { "epoch": 3.49, "learning_rate": 3.836971001127165e-05, "loss": 3.0363, "step": 136200 }, { "epoch": 3.49, "learning_rate": 3.836117088499505e-05, "loss": 2.9738, "step": 136300 }, { "epoch": 3.49, "learning_rate": 3.835263175871845e-05, "loss": 2.9322, "step": 136400 }, { "epoch": 3.5, "learning_rate": 3.8344092632441855e-05, "loss": 3.1189, "step": 136500 }, { "epoch": 3.5, "learning_rate": 3.833555350616525e-05, "loss": 2.8642, "step": 136600 }, { "epoch": 3.5, "learning_rate": 3.832701437988865e-05, "loss": 2.8171, "step": 136700 }, { "epoch": 3.5, "learning_rate": 3.831847525361205e-05, "loss": 3.0146, "step": 136800 }, { "epoch": 3.51, "learning_rate": 3.8309936127335455e-05, "loss": 2.9867, "step": 136900 }, { "epoch": 3.51, "learning_rate": 3.8301397001058856e-05, "loss": 2.8355, "step": 137000 }, { "epoch": 3.51, "learning_rate": 3.829285787478226e-05, "loss": 2.7731, "step": 137100 }, { "epoch": 3.51, "learning_rate": 3.828431874850565e-05, "loss": 2.9714, "step": 137200 }, { "epoch": 3.52, "learning_rate": 3.827577962222906e-05, "loss": 2.9709, "step": 137300 }, { "epoch": 3.52, "learning_rate": 3.8267240495952456e-05, "loss": 3.0123, "step": 137400 }, { "epoch": 3.52, "learning_rate": 3.825870136967586e-05, "loss": 2.9678, "step": 137500 }, { "epoch": 3.52, "learning_rate": 3.825016224339926e-05, "loss": 2.7263, "step": 137600 }, { "epoch": 3.53, "learning_rate": 3.824162311712266e-05, "loss": 2.9291, "step": 137700 }, { "epoch": 3.53, "learning_rate": 3.8233083990846055e-05, "loss": 2.848, "step": 137800 }, { "epoch": 3.53, "learning_rate": 3.8224544864569463e-05, "loss": 2.8997, "step": 137900 }, { "epoch": 3.54, "learning_rate": 3.821600573829286e-05, "loss": 3.0481, "step": 138000 }, { "epoch": 3.54, "learning_rate": 3.820746661201626e-05, "loss": 3.079, "step": 138100 }, { "epoch": 3.54, "learning_rate": 3.819892748573966e-05, "loss": 2.9975, "step": 138200 }, { "epoch": 3.54, "learning_rate": 3.819038835946306e-05, "loss": 3.0374, "step": 138300 }, { "epoch": 3.55, "learning_rate": 3.8181849233186464e-05, "loss": 2.912, "step": 138400 }, { "epoch": 3.55, "learning_rate": 3.8173310106909866e-05, "loss": 3.0378, "step": 138500 }, { "epoch": 3.55, "learning_rate": 3.816477098063326e-05, "loss": 2.9073, "step": 138600 }, { "epoch": 3.55, "learning_rate": 3.815623185435667e-05, "loss": 2.9153, "step": 138700 }, { "epoch": 3.56, "learning_rate": 3.8147692728080064e-05, "loss": 2.8937, "step": 138800 }, { "epoch": 3.56, "learning_rate": 3.8139153601803465e-05, "loss": 2.9533, "step": 138900 }, { "epoch": 3.56, "learning_rate": 3.813061447552687e-05, "loss": 2.923, "step": 139000 }, { "epoch": 3.56, "learning_rate": 3.812207534925027e-05, "loss": 2.9185, "step": 139100 }, { "epoch": 3.57, "learning_rate": 3.811353622297367e-05, "loss": 3.0708, "step": 139200 }, { "epoch": 3.57, "learning_rate": 3.810499709669707e-05, "loss": 3.0239, "step": 139300 }, { "epoch": 3.57, "learning_rate": 3.8096457970420466e-05, "loss": 3.0284, "step": 139400 }, { "epoch": 3.57, "learning_rate": 3.8087918844143875e-05, "loss": 2.762, "step": 139500 }, { "epoch": 3.58, "learning_rate": 3.807937971786727e-05, "loss": 2.975, "step": 139600 }, { "epoch": 3.58, "learning_rate": 3.807084059159067e-05, "loss": 3.1, "step": 139700 }, { "epoch": 3.58, "learning_rate": 3.806230146531407e-05, "loss": 2.9565, "step": 139800 }, { "epoch": 3.58, "learning_rate": 3.8053762339037474e-05, "loss": 2.8751, "step": 139900 }, { "epoch": 3.59, "learning_rate": 3.804522321276087e-05, "loss": 2.9852, "step": 140000 }, { "epoch": 3.59, "learning_rate": 3.803668408648428e-05, "loss": 2.8591, "step": 140100 }, { "epoch": 3.59, "learning_rate": 3.802814496020767e-05, "loss": 2.8044, "step": 140200 }, { "epoch": 3.59, "learning_rate": 3.8019605833931073e-05, "loss": 3.0422, "step": 140300 }, { "epoch": 3.6, "learning_rate": 3.8011066707654475e-05, "loss": 2.9013, "step": 140400 }, { "epoch": 3.6, "learning_rate": 3.800252758137787e-05, "loss": 2.9534, "step": 140500 }, { "epoch": 3.6, "learning_rate": 3.799398845510128e-05, "loss": 2.9336, "step": 140600 }, { "epoch": 3.6, "learning_rate": 3.798544932882467e-05, "loss": 3.0396, "step": 140700 }, { "epoch": 3.61, "learning_rate": 3.7976910202548074e-05, "loss": 2.8578, "step": 140800 }, { "epoch": 3.61, "learning_rate": 3.7968371076271476e-05, "loss": 3.0548, "step": 140900 }, { "epoch": 3.61, "learning_rate": 3.795983194999488e-05, "loss": 2.7049, "step": 141000 }, { "epoch": 3.61, "learning_rate": 3.795129282371828e-05, "loss": 2.9947, "step": 141100 }, { "epoch": 3.62, "learning_rate": 3.794275369744168e-05, "loss": 2.8426, "step": 141200 }, { "epoch": 3.62, "learning_rate": 3.7934214571165075e-05, "loss": 2.8844, "step": 141300 }, { "epoch": 3.62, "learning_rate": 3.7925675444888484e-05, "loss": 2.7712, "step": 141400 }, { "epoch": 3.62, "learning_rate": 3.791713631861188e-05, "loss": 2.8845, "step": 141500 }, { "epoch": 3.63, "learning_rate": 3.790859719233528e-05, "loss": 2.8524, "step": 141600 }, { "epoch": 3.63, "learning_rate": 3.790005806605868e-05, "loss": 2.6535, "step": 141700 }, { "epoch": 3.63, "learning_rate": 3.789151893978208e-05, "loss": 2.9005, "step": 141800 }, { "epoch": 3.64, "learning_rate": 3.7882979813505485e-05, "loss": 2.9857, "step": 141900 }, { "epoch": 3.64, "learning_rate": 3.7874440687228886e-05, "loss": 2.7747, "step": 142000 }, { "epoch": 3.64, "learning_rate": 3.786590156095228e-05, "loss": 3.0335, "step": 142100 }, { "epoch": 3.64, "learning_rate": 3.785736243467569e-05, "loss": 2.82, "step": 142200 }, { "epoch": 3.65, "learning_rate": 3.7848823308399084e-05, "loss": 3.0067, "step": 142300 }, { "epoch": 3.65, "learning_rate": 3.7840284182122486e-05, "loss": 2.9935, "step": 142400 }, { "epoch": 3.65, "learning_rate": 3.783174505584589e-05, "loss": 2.9691, "step": 142500 }, { "epoch": 3.65, "learning_rate": 3.782320592956929e-05, "loss": 2.9787, "step": 142600 }, { "epoch": 3.66, "learning_rate": 3.7814666803292684e-05, "loss": 3.0089, "step": 142700 }, { "epoch": 3.66, "learning_rate": 3.780612767701609e-05, "loss": 3.0469, "step": 142800 }, { "epoch": 3.66, "learning_rate": 3.779758855073949e-05, "loss": 3.0362, "step": 142900 }, { "epoch": 3.66, "learning_rate": 3.778904942446289e-05, "loss": 2.9551, "step": 143000 }, { "epoch": 3.67, "learning_rate": 3.778051029818629e-05, "loss": 2.6902, "step": 143100 }, { "epoch": 3.67, "learning_rate": 3.777197117190969e-05, "loss": 2.8595, "step": 143200 }, { "epoch": 3.67, "learning_rate": 3.776343204563309e-05, "loss": 2.8926, "step": 143300 }, { "epoch": 3.67, "learning_rate": 3.7754892919356494e-05, "loss": 2.8702, "step": 143400 }, { "epoch": 3.68, "learning_rate": 3.774635379307989e-05, "loss": 2.9157, "step": 143500 }, { "epoch": 3.68, "learning_rate": 3.77378146668033e-05, "loss": 2.8203, "step": 143600 }, { "epoch": 3.68, "learning_rate": 3.772927554052669e-05, "loss": 2.9764, "step": 143700 }, { "epoch": 3.68, "learning_rate": 3.7720736414250094e-05, "loss": 3.0338, "step": 143800 }, { "epoch": 3.69, "learning_rate": 3.7712197287973495e-05, "loss": 2.8205, "step": 143900 }, { "epoch": 3.69, "learning_rate": 3.77036581616969e-05, "loss": 2.9368, "step": 144000 }, { "epoch": 3.69, "learning_rate": 3.76951190354203e-05, "loss": 3.0391, "step": 144100 }, { "epoch": 3.69, "learning_rate": 3.76865799091437e-05, "loss": 2.9394, "step": 144200 }, { "epoch": 3.7, "learning_rate": 3.7678040782867095e-05, "loss": 3.0125, "step": 144300 }, { "epoch": 3.7, "learning_rate": 3.76695016565905e-05, "loss": 2.9793, "step": 144400 }, { "epoch": 3.7, "learning_rate": 3.76609625303139e-05, "loss": 3.0127, "step": 144500 }, { "epoch": 3.7, "learning_rate": 3.76524234040373e-05, "loss": 2.9295, "step": 144600 }, { "epoch": 3.71, "learning_rate": 3.76438842777607e-05, "loss": 2.926, "step": 144700 }, { "epoch": 3.71, "learning_rate": 3.76353451514841e-05, "loss": 2.8679, "step": 144800 }, { "epoch": 3.71, "learning_rate": 3.76268060252075e-05, "loss": 2.874, "step": 144900 }, { "epoch": 3.71, "learning_rate": 3.7618266898930906e-05, "loss": 2.8243, "step": 145000 }, { "epoch": 3.72, "learning_rate": 3.76097277726543e-05, "loss": 2.6775, "step": 145100 }, { "epoch": 3.72, "learning_rate": 3.76011886463777e-05, "loss": 2.9936, "step": 145200 }, { "epoch": 3.72, "learning_rate": 3.7592649520101103e-05, "loss": 2.87, "step": 145300 }, { "epoch": 3.72, "learning_rate": 3.7584110393824505e-05, "loss": 2.6478, "step": 145400 }, { "epoch": 3.73, "learning_rate": 3.7575571267547907e-05, "loss": 3.1228, "step": 145500 }, { "epoch": 3.73, "learning_rate": 3.756703214127131e-05, "loss": 2.9155, "step": 145600 }, { "epoch": 3.73, "learning_rate": 3.75584930149947e-05, "loss": 2.9365, "step": 145700 }, { "epoch": 3.74, "learning_rate": 3.754995388871811e-05, "loss": 2.9418, "step": 145800 }, { "epoch": 3.74, "learning_rate": 3.7541414762441506e-05, "loss": 2.8428, "step": 145900 }, { "epoch": 3.74, "learning_rate": 3.753287563616491e-05, "loss": 2.9287, "step": 146000 }, { "epoch": 3.74, "learning_rate": 3.752433650988831e-05, "loss": 2.7329, "step": 146100 }, { "epoch": 3.75, "learning_rate": 3.751579738361171e-05, "loss": 2.9242, "step": 146200 }, { "epoch": 3.75, "learning_rate": 3.750725825733511e-05, "loss": 3.0612, "step": 146300 }, { "epoch": 3.75, "learning_rate": 3.7498719131058514e-05, "loss": 2.8653, "step": 146400 }, { "epoch": 3.75, "learning_rate": 3.749018000478191e-05, "loss": 2.989, "step": 146500 }, { "epoch": 3.76, "learning_rate": 3.748164087850532e-05, "loss": 2.9249, "step": 146600 }, { "epoch": 3.76, "learning_rate": 3.747310175222871e-05, "loss": 2.9209, "step": 146700 }, { "epoch": 3.76, "learning_rate": 3.746456262595211e-05, "loss": 2.7595, "step": 146800 }, { "epoch": 3.76, "learning_rate": 3.7456023499675515e-05, "loss": 2.8333, "step": 146900 }, { "epoch": 3.77, "learning_rate": 3.7447484373398916e-05, "loss": 2.886, "step": 147000 }, { "epoch": 3.77, "learning_rate": 3.743894524712232e-05, "loss": 2.806, "step": 147100 }, { "epoch": 3.77, "learning_rate": 3.743040612084572e-05, "loss": 2.8587, "step": 147200 }, { "epoch": 3.77, "learning_rate": 3.7421866994569114e-05, "loss": 2.8406, "step": 147300 }, { "epoch": 3.78, "learning_rate": 3.741332786829252e-05, "loss": 2.7372, "step": 147400 }, { "epoch": 3.78, "learning_rate": 3.740478874201592e-05, "loss": 3.1244, "step": 147500 }, { "epoch": 3.78, "learning_rate": 3.739624961573932e-05, "loss": 2.8076, "step": 147600 }, { "epoch": 3.78, "learning_rate": 3.738771048946272e-05, "loss": 2.7125, "step": 147700 }, { "epoch": 3.79, "learning_rate": 3.737917136318612e-05, "loss": 2.8492, "step": 147800 }, { "epoch": 3.79, "learning_rate": 3.737063223690952e-05, "loss": 2.9047, "step": 147900 }, { "epoch": 3.79, "learning_rate": 3.7362093110632925e-05, "loss": 2.7274, "step": 148000 }, { "epoch": 3.79, "learning_rate": 3.735355398435632e-05, "loss": 2.96, "step": 148100 }, { "epoch": 3.8, "learning_rate": 3.734501485807972e-05, "loss": 2.8808, "step": 148200 }, { "epoch": 3.8, "learning_rate": 3.733647573180312e-05, "loss": 2.749, "step": 148300 }, { "epoch": 3.8, "learning_rate": 3.7327936605526524e-05, "loss": 2.6918, "step": 148400 }, { "epoch": 3.8, "learning_rate": 3.7319397479249926e-05, "loss": 3.0696, "step": 148500 }, { "epoch": 3.81, "learning_rate": 3.731085835297333e-05, "loss": 2.9101, "step": 148600 }, { "epoch": 3.81, "learning_rate": 3.730231922669672e-05, "loss": 3.0157, "step": 148700 }, { "epoch": 3.81, "learning_rate": 3.729378010042013e-05, "loss": 3.138, "step": 148800 }, { "epoch": 3.81, "learning_rate": 3.7285240974143525e-05, "loss": 2.9909, "step": 148900 }, { "epoch": 3.82, "learning_rate": 3.727670184786693e-05, "loss": 2.6905, "step": 149000 }, { "epoch": 3.82, "learning_rate": 3.726816272159033e-05, "loss": 2.7996, "step": 149100 }, { "epoch": 3.82, "learning_rate": 3.725962359531373e-05, "loss": 2.8014, "step": 149200 }, { "epoch": 3.82, "learning_rate": 3.725108446903713e-05, "loss": 2.914, "step": 149300 }, { "epoch": 3.83, "learning_rate": 3.724254534276053e-05, "loss": 2.9544, "step": 149400 }, { "epoch": 3.83, "learning_rate": 3.723400621648393e-05, "loss": 2.7879, "step": 149500 }, { "epoch": 3.83, "learning_rate": 3.7225467090207336e-05, "loss": 2.741, "step": 149600 }, { "epoch": 3.83, "learning_rate": 3.721692796393073e-05, "loss": 2.9305, "step": 149700 }, { "epoch": 3.84, "learning_rate": 3.720838883765413e-05, "loss": 2.8774, "step": 149800 }, { "epoch": 3.84, "learning_rate": 3.7199849711377534e-05, "loss": 2.8693, "step": 149900 }, { "epoch": 3.84, "learning_rate": 3.7191310585100936e-05, "loss": 2.8799, "step": 150000 }, { "epoch": 3.85, "learning_rate": 3.718277145882433e-05, "loss": 2.9987, "step": 150100 }, { "epoch": 3.85, "learning_rate": 3.717423233254774e-05, "loss": 3.0406, "step": 150200 }, { "epoch": 3.85, "learning_rate": 3.7165693206271133e-05, "loss": 2.8638, "step": 150300 }, { "epoch": 3.85, "learning_rate": 3.7157154079994535e-05, "loss": 3.0982, "step": 150400 }, { "epoch": 3.86, "learning_rate": 3.7148614953717937e-05, "loss": 2.8802, "step": 150500 }, { "epoch": 3.86, "learning_rate": 3.714007582744134e-05, "loss": 2.8833, "step": 150600 }, { "epoch": 3.86, "learning_rate": 3.713153670116474e-05, "loss": 2.9561, "step": 150700 }, { "epoch": 3.86, "learning_rate": 3.712299757488814e-05, "loss": 2.853, "step": 150800 }, { "epoch": 3.87, "learning_rate": 3.7114458448611536e-05, "loss": 3.0224, "step": 150900 }, { "epoch": 3.87, "learning_rate": 3.7105919322334944e-05, "loss": 3.076, "step": 151000 }, { "epoch": 3.87, "learning_rate": 3.709738019605834e-05, "loss": 2.7872, "step": 151100 }, { "epoch": 3.87, "learning_rate": 3.708884106978174e-05, "loss": 2.9373, "step": 151200 }, { "epoch": 3.88, "learning_rate": 3.708030194350514e-05, "loss": 3.0687, "step": 151300 }, { "epoch": 3.88, "learning_rate": 3.7071762817228544e-05, "loss": 2.9908, "step": 151400 }, { "epoch": 3.88, "learning_rate": 3.7063223690951945e-05, "loss": 3.005, "step": 151500 }, { "epoch": 3.88, "learning_rate": 3.705468456467535e-05, "loss": 2.8163, "step": 151600 }, { "epoch": 3.89, "learning_rate": 3.704614543839874e-05, "loss": 2.804, "step": 151700 }, { "epoch": 3.89, "learning_rate": 3.703760631212215e-05, "loss": 2.8771, "step": 151800 }, { "epoch": 3.89, "learning_rate": 3.7029067185845545e-05, "loss": 2.7535, "step": 151900 }, { "epoch": 3.89, "learning_rate": 3.7020528059568946e-05, "loss": 3.0275, "step": 152000 }, { "epoch": 3.9, "learning_rate": 3.701198893329235e-05, "loss": 3.0363, "step": 152100 }, { "epoch": 3.9, "learning_rate": 3.700344980701575e-05, "loss": 2.7471, "step": 152200 }, { "epoch": 3.9, "learning_rate": 3.699491068073915e-05, "loss": 2.9191, "step": 152300 }, { "epoch": 3.9, "learning_rate": 3.698637155446255e-05, "loss": 3.0247, "step": 152400 }, { "epoch": 3.91, "learning_rate": 3.697783242818595e-05, "loss": 2.8533, "step": 152500 }, { "epoch": 3.91, "learning_rate": 3.6969293301909356e-05, "loss": 2.8196, "step": 152600 }, { "epoch": 3.91, "learning_rate": 3.696075417563275e-05, "loss": 2.9538, "step": 152700 }, { "epoch": 3.91, "learning_rate": 3.695221504935615e-05, "loss": 2.973, "step": 152800 }, { "epoch": 3.92, "learning_rate": 3.694367592307955e-05, "loss": 2.8075, "step": 152900 }, { "epoch": 3.92, "learning_rate": 3.6935136796802955e-05, "loss": 2.8664, "step": 153000 }, { "epoch": 3.92, "learning_rate": 3.692659767052635e-05, "loss": 3.0199, "step": 153100 }, { "epoch": 3.92, "learning_rate": 3.691805854424976e-05, "loss": 2.7264, "step": 153200 }, { "epoch": 3.93, "learning_rate": 3.690951941797315e-05, "loss": 2.7767, "step": 153300 }, { "epoch": 3.93, "learning_rate": 3.6900980291696554e-05, "loss": 2.8702, "step": 153400 }, { "epoch": 3.93, "learning_rate": 3.6892441165419956e-05, "loss": 2.8619, "step": 153500 }, { "epoch": 3.93, "learning_rate": 3.688390203914336e-05, "loss": 2.7582, "step": 153600 }, { "epoch": 3.94, "learning_rate": 3.687536291286676e-05, "loss": 2.8249, "step": 153700 }, { "epoch": 3.94, "learning_rate": 3.686682378659016e-05, "loss": 2.9421, "step": 153800 }, { "epoch": 3.94, "learning_rate": 3.6858284660313555e-05, "loss": 2.9268, "step": 153900 }, { "epoch": 3.95, "learning_rate": 3.6849745534036964e-05, "loss": 2.9982, "step": 154000 }, { "epoch": 3.95, "learning_rate": 3.684120640776036e-05, "loss": 2.8282, "step": 154100 }, { "epoch": 3.95, "learning_rate": 3.683266728148376e-05, "loss": 2.6538, "step": 154200 }, { "epoch": 3.95, "learning_rate": 3.682412815520716e-05, "loss": 2.9332, "step": 154300 }, { "epoch": 3.96, "learning_rate": 3.681558902893056e-05, "loss": 2.9436, "step": 154400 }, { "epoch": 3.96, "learning_rate": 3.6807049902653965e-05, "loss": 2.9657, "step": 154500 }, { "epoch": 3.96, "learning_rate": 3.6798510776377366e-05, "loss": 2.8455, "step": 154600 }, { "epoch": 3.96, "learning_rate": 3.678997165010076e-05, "loss": 3.0626, "step": 154700 }, { "epoch": 3.97, "learning_rate": 3.678143252382417e-05, "loss": 3.0584, "step": 154800 }, { "epoch": 3.97, "learning_rate": 3.6772893397547564e-05, "loss": 3.0656, "step": 154900 }, { "epoch": 3.97, "learning_rate": 3.6764354271270966e-05, "loss": 2.9965, "step": 155000 }, { "epoch": 3.97, "learning_rate": 3.675581514499437e-05, "loss": 3.1522, "step": 155100 }, { "epoch": 3.98, "learning_rate": 3.674727601871777e-05, "loss": 2.9915, "step": 155200 }, { "epoch": 3.98, "learning_rate": 3.6738736892441163e-05, "loss": 2.927, "step": 155300 }, { "epoch": 3.98, "learning_rate": 3.673019776616457e-05, "loss": 3.0846, "step": 155400 }, { "epoch": 3.98, "learning_rate": 3.6721658639887967e-05, "loss": 2.9297, "step": 155500 }, { "epoch": 3.99, "learning_rate": 3.6713119513611375e-05, "loss": 2.8472, "step": 155600 }, { "epoch": 3.99, "learning_rate": 3.670458038733477e-05, "loss": 2.7157, "step": 155700 }, { "epoch": 3.99, "learning_rate": 3.669604126105817e-05, "loss": 2.8304, "step": 155800 }, { "epoch": 3.99, "learning_rate": 3.668750213478157e-05, "loss": 2.9214, "step": 155900 }, { "epoch": 4.0, "learning_rate": 3.6678963008504974e-05, "loss": 2.8294, "step": 156000 }, { "epoch": 4.0, "learning_rate": 3.667042388222837e-05, "loss": 2.7344, "step": 156100 }, { "epoch": 4.0, "learning_rate": 3.666188475595178e-05, "loss": 2.9135, "step": 156200 }, { "epoch": 4.0, "learning_rate": 3.665334562967517e-05, "loss": 2.7938, "step": 156300 }, { "epoch": 4.01, "learning_rate": 3.6644806503398574e-05, "loss": 2.7882, "step": 156400 }, { "epoch": 4.01, "learning_rate": 3.6636267377121975e-05, "loss": 2.7811, "step": 156500 }, { "epoch": 4.01, "learning_rate": 3.662772825084538e-05, "loss": 2.8213, "step": 156600 }, { "epoch": 4.01, "learning_rate": 3.661918912456878e-05, "loss": 2.8907, "step": 156700 }, { "epoch": 4.02, "learning_rate": 3.661064999829218e-05, "loss": 2.8295, "step": 156800 }, { "epoch": 4.02, "learning_rate": 3.6602110872015575e-05, "loss": 2.7955, "step": 156900 }, { "epoch": 4.02, "learning_rate": 3.659357174573898e-05, "loss": 2.9084, "step": 157000 }, { "epoch": 4.02, "learning_rate": 3.658503261946238e-05, "loss": 2.7326, "step": 157100 }, { "epoch": 4.03, "learning_rate": 3.657649349318578e-05, "loss": 2.8411, "step": 157200 }, { "epoch": 4.03, "learning_rate": 3.656795436690918e-05, "loss": 2.9072, "step": 157300 }, { "epoch": 4.03, "learning_rate": 3.655941524063258e-05, "loss": 2.7981, "step": 157400 }, { "epoch": 4.03, "learning_rate": 3.6550876114355984e-05, "loss": 2.7779, "step": 157500 }, { "epoch": 4.04, "learning_rate": 3.6542336988079385e-05, "loss": 3.0451, "step": 157600 }, { "epoch": 4.04, "learning_rate": 3.653379786180278e-05, "loss": 3.1684, "step": 157700 }, { "epoch": 4.04, "learning_rate": 3.652525873552618e-05, "loss": 2.9367, "step": 157800 }, { "epoch": 4.04, "learning_rate": 3.651671960924958e-05, "loss": 2.8674, "step": 157900 }, { "epoch": 4.05, "learning_rate": 3.650818048297298e-05, "loss": 2.9975, "step": 158000 }, { "epoch": 4.05, "learning_rate": 3.6499641356696386e-05, "loss": 3.1238, "step": 158100 }, { "epoch": 4.05, "learning_rate": 3.649110223041978e-05, "loss": 2.8777, "step": 158200 }, { "epoch": 4.06, "learning_rate": 3.648256310414318e-05, "loss": 2.7382, "step": 158300 }, { "epoch": 4.06, "learning_rate": 3.6474023977866584e-05, "loss": 3.0051, "step": 158400 }, { "epoch": 4.06, "learning_rate": 3.6465484851589986e-05, "loss": 2.9901, "step": 158500 }, { "epoch": 4.06, "learning_rate": 3.645694572531339e-05, "loss": 2.9462, "step": 158600 }, { "epoch": 4.07, "learning_rate": 3.644840659903679e-05, "loss": 3.0967, "step": 158700 }, { "epoch": 4.07, "learning_rate": 3.6439867472760184e-05, "loss": 2.8192, "step": 158800 }, { "epoch": 4.07, "learning_rate": 3.643132834648359e-05, "loss": 3.0004, "step": 158900 }, { "epoch": 4.07, "learning_rate": 3.642278922020699e-05, "loss": 2.8679, "step": 159000 }, { "epoch": 4.08, "learning_rate": 3.641425009393039e-05, "loss": 2.6969, "step": 159100 }, { "epoch": 4.08, "learning_rate": 3.640571096765379e-05, "loss": 2.8129, "step": 159200 }, { "epoch": 4.08, "learning_rate": 3.639717184137719e-05, "loss": 2.9315, "step": 159300 }, { "epoch": 4.08, "learning_rate": 3.638863271510059e-05, "loss": 2.8286, "step": 159400 }, { "epoch": 4.09, "learning_rate": 3.6380093588823995e-05, "loss": 2.8636, "step": 159500 }, { "epoch": 4.09, "learning_rate": 3.637155446254739e-05, "loss": 2.8053, "step": 159600 }, { "epoch": 4.09, "learning_rate": 3.63630153362708e-05, "loss": 2.8558, "step": 159700 }, { "epoch": 4.09, "learning_rate": 3.635447620999419e-05, "loss": 2.8891, "step": 159800 }, { "epoch": 4.1, "learning_rate": 3.6345937083717594e-05, "loss": 2.8303, "step": 159900 }, { "epoch": 4.1, "learning_rate": 3.6337397957440996e-05, "loss": 2.8755, "step": 160000 }, { "epoch": 4.1, "learning_rate": 3.63288588311644e-05, "loss": 2.9814, "step": 160100 }, { "epoch": 4.1, "learning_rate": 3.632031970488779e-05, "loss": 2.9474, "step": 160200 }, { "epoch": 4.11, "learning_rate": 3.63117805786112e-05, "loss": 2.7752, "step": 160300 }, { "epoch": 4.11, "learning_rate": 3.6303241452334595e-05, "loss": 2.9008, "step": 160400 }, { "epoch": 4.11, "learning_rate": 3.6294702326058e-05, "loss": 2.8035, "step": 160500 }, { "epoch": 4.11, "learning_rate": 3.62861631997814e-05, "loss": 2.899, "step": 160600 }, { "epoch": 4.12, "learning_rate": 3.62776240735048e-05, "loss": 3.0576, "step": 160700 }, { "epoch": 4.12, "learning_rate": 3.62690849472282e-05, "loss": 2.9951, "step": 160800 }, { "epoch": 4.12, "learning_rate": 3.62605458209516e-05, "loss": 2.7915, "step": 160900 }, { "epoch": 4.12, "learning_rate": 3.6252006694675e-05, "loss": 3.0475, "step": 161000 }, { "epoch": 4.13, "learning_rate": 3.6243467568398406e-05, "loss": 2.8174, "step": 161100 }, { "epoch": 4.13, "learning_rate": 3.62349284421218e-05, "loss": 2.8561, "step": 161200 }, { "epoch": 4.13, "learning_rate": 3.62263893158452e-05, "loss": 2.9166, "step": 161300 }, { "epoch": 4.13, "learning_rate": 3.6217850189568604e-05, "loss": 2.8769, "step": 161400 }, { "epoch": 4.14, "learning_rate": 3.6209311063292005e-05, "loss": 2.9089, "step": 161500 }, { "epoch": 4.14, "learning_rate": 3.620077193701541e-05, "loss": 2.9342, "step": 161600 }, { "epoch": 4.14, "learning_rate": 3.619223281073881e-05, "loss": 2.7861, "step": 161700 }, { "epoch": 4.14, "learning_rate": 3.61836936844622e-05, "loss": 2.9156, "step": 161800 }, { "epoch": 4.15, "learning_rate": 3.617515455818561e-05, "loss": 2.8686, "step": 161900 }, { "epoch": 4.15, "learning_rate": 3.6166615431909006e-05, "loss": 2.8429, "step": 162000 }, { "epoch": 4.15, "learning_rate": 3.615807630563241e-05, "loss": 2.917, "step": 162100 }, { "epoch": 4.16, "learning_rate": 3.614953717935581e-05, "loss": 2.9653, "step": 162200 }, { "epoch": 4.16, "learning_rate": 3.614099805307921e-05, "loss": 2.9225, "step": 162300 }, { "epoch": 4.16, "learning_rate": 3.613245892680261e-05, "loss": 2.9669, "step": 162400 }, { "epoch": 4.16, "learning_rate": 3.6123919800526014e-05, "loss": 2.9289, "step": 162500 }, { "epoch": 4.17, "learning_rate": 3.611538067424941e-05, "loss": 2.8101, "step": 162600 }, { "epoch": 4.17, "learning_rate": 3.610684154797282e-05, "loss": 2.7668, "step": 162700 }, { "epoch": 4.17, "learning_rate": 3.609830242169621e-05, "loss": 2.6915, "step": 162800 }, { "epoch": 4.17, "learning_rate": 3.608976329541961e-05, "loss": 2.8424, "step": 162900 }, { "epoch": 4.18, "learning_rate": 3.6081224169143015e-05, "loss": 2.9561, "step": 163000 }, { "epoch": 4.18, "learning_rate": 3.6072685042866416e-05, "loss": 2.8571, "step": 163100 }, { "epoch": 4.18, "learning_rate": 3.606414591658981e-05, "loss": 3.0197, "step": 163200 }, { "epoch": 4.18, "learning_rate": 3.605560679031322e-05, "loss": 2.8519, "step": 163300 }, { "epoch": 4.19, "learning_rate": 3.6047067664036614e-05, "loss": 2.6463, "step": 163400 }, { "epoch": 4.19, "learning_rate": 3.6038528537760016e-05, "loss": 2.6629, "step": 163500 }, { "epoch": 4.19, "learning_rate": 3.602998941148342e-05, "loss": 2.8949, "step": 163600 }, { "epoch": 4.19, "learning_rate": 3.602145028520682e-05, "loss": 2.709, "step": 163700 }, { "epoch": 4.2, "learning_rate": 3.601291115893022e-05, "loss": 2.7052, "step": 163800 }, { "epoch": 4.2, "learning_rate": 3.600437203265362e-05, "loss": 2.7292, "step": 163900 }, { "epoch": 4.2, "learning_rate": 3.599583290637702e-05, "loss": 2.7062, "step": 164000 }, { "epoch": 4.2, "learning_rate": 3.5987293780100425e-05, "loss": 2.8184, "step": 164100 }, { "epoch": 4.21, "learning_rate": 3.597875465382382e-05, "loss": 2.8715, "step": 164200 }, { "epoch": 4.21, "learning_rate": 3.597021552754722e-05, "loss": 2.9527, "step": 164300 }, { "epoch": 4.21, "learning_rate": 3.596167640127062e-05, "loss": 2.9549, "step": 164400 }, { "epoch": 4.21, "learning_rate": 3.5953137274994025e-05, "loss": 2.7693, "step": 164500 }, { "epoch": 4.22, "learning_rate": 3.5944598148717426e-05, "loss": 2.847, "step": 164600 }, { "epoch": 4.22, "learning_rate": 3.593605902244083e-05, "loss": 2.9589, "step": 164700 }, { "epoch": 4.22, "learning_rate": 3.592751989616422e-05, "loss": 2.7985, "step": 164800 }, { "epoch": 4.22, "learning_rate": 3.591898076988763e-05, "loss": 2.7499, "step": 164900 }, { "epoch": 4.23, "learning_rate": 3.5910441643611026e-05, "loss": 2.8699, "step": 165000 }, { "epoch": 4.23, "learning_rate": 3.590190251733443e-05, "loss": 2.9339, "step": 165100 }, { "epoch": 4.23, "learning_rate": 3.589336339105783e-05, "loss": 2.7762, "step": 165200 }, { "epoch": 4.23, "learning_rate": 3.588482426478123e-05, "loss": 2.6134, "step": 165300 }, { "epoch": 4.24, "learning_rate": 3.587628513850463e-05, "loss": 2.8161, "step": 165400 }, { "epoch": 4.24, "learning_rate": 3.586774601222803e-05, "loss": 2.9087, "step": 165500 }, { "epoch": 4.24, "learning_rate": 3.585920688595143e-05, "loss": 2.7699, "step": 165600 }, { "epoch": 4.24, "learning_rate": 3.5850667759674836e-05, "loss": 2.9153, "step": 165700 }, { "epoch": 4.25, "learning_rate": 3.584212863339823e-05, "loss": 2.9153, "step": 165800 }, { "epoch": 4.25, "learning_rate": 3.583358950712163e-05, "loss": 2.8115, "step": 165900 }, { "epoch": 4.25, "learning_rate": 3.5825050380845034e-05, "loss": 2.6892, "step": 166000 }, { "epoch": 4.26, "learning_rate": 3.5816511254568436e-05, "loss": 2.7988, "step": 166100 }, { "epoch": 4.26, "learning_rate": 3.580797212829183e-05, "loss": 2.9257, "step": 166200 }, { "epoch": 4.26, "learning_rate": 3.579943300201524e-05, "loss": 2.8666, "step": 166300 }, { "epoch": 4.26, "learning_rate": 3.5790893875738634e-05, "loss": 2.7994, "step": 166400 }, { "epoch": 4.27, "learning_rate": 3.5782354749462035e-05, "loss": 2.9492, "step": 166500 }, { "epoch": 4.27, "learning_rate": 3.577381562318544e-05, "loss": 2.8635, "step": 166600 }, { "epoch": 4.27, "learning_rate": 3.576527649690884e-05, "loss": 2.868, "step": 166700 }, { "epoch": 4.27, "learning_rate": 3.575673737063224e-05, "loss": 2.887, "step": 166800 }, { "epoch": 4.28, "learning_rate": 3.574819824435564e-05, "loss": 2.8005, "step": 166900 }, { "epoch": 4.28, "learning_rate": 3.5739659118079036e-05, "loss": 2.905, "step": 167000 }, { "epoch": 4.28, "learning_rate": 3.5731119991802444e-05, "loss": 2.7722, "step": 167100 }, { "epoch": 4.28, "learning_rate": 3.572258086552584e-05, "loss": 2.836, "step": 167200 }, { "epoch": 4.29, "learning_rate": 3.571404173924924e-05, "loss": 2.9579, "step": 167300 }, { "epoch": 4.29, "learning_rate": 3.570550261297264e-05, "loss": 2.7944, "step": 167400 }, { "epoch": 4.29, "learning_rate": 3.5696963486696044e-05, "loss": 2.7011, "step": 167500 }, { "epoch": 4.29, "learning_rate": 3.5688424360419445e-05, "loss": 2.8905, "step": 167600 }, { "epoch": 4.3, "learning_rate": 3.567988523414285e-05, "loss": 2.9255, "step": 167700 }, { "epoch": 4.3, "learning_rate": 3.567134610786624e-05, "loss": 2.919, "step": 167800 }, { "epoch": 4.3, "learning_rate": 3.566280698158965e-05, "loss": 2.9211, "step": 167900 }, { "epoch": 4.3, "learning_rate": 3.5654267855313045e-05, "loss": 2.7088, "step": 168000 }, { "epoch": 4.31, "learning_rate": 3.5645728729036446e-05, "loss": 2.6552, "step": 168100 }, { "epoch": 4.31, "learning_rate": 3.563718960275985e-05, "loss": 2.9929, "step": 168200 }, { "epoch": 4.31, "learning_rate": 3.562865047648325e-05, "loss": 2.8072, "step": 168300 }, { "epoch": 4.31, "learning_rate": 3.5620111350206644e-05, "loss": 2.9567, "step": 168400 }, { "epoch": 4.32, "learning_rate": 3.561157222393005e-05, "loss": 2.8903, "step": 168500 }, { "epoch": 4.32, "learning_rate": 3.560303309765345e-05, "loss": 2.8801, "step": 168600 }, { "epoch": 4.32, "learning_rate": 3.559449397137685e-05, "loss": 2.749, "step": 168700 }, { "epoch": 4.32, "learning_rate": 3.558595484510025e-05, "loss": 2.8069, "step": 168800 }, { "epoch": 4.33, "learning_rate": 3.557741571882365e-05, "loss": 2.8033, "step": 168900 }, { "epoch": 4.33, "learning_rate": 3.5568876592547054e-05, "loss": 2.6476, "step": 169000 }, { "epoch": 4.33, "learning_rate": 3.5560337466270455e-05, "loss": 3.0062, "step": 169100 }, { "epoch": 4.33, "learning_rate": 3.555179833999385e-05, "loss": 2.9767, "step": 169200 }, { "epoch": 4.34, "learning_rate": 3.554325921371726e-05, "loss": 2.8588, "step": 169300 }, { "epoch": 4.34, "learning_rate": 3.553472008744065e-05, "loss": 2.8502, "step": 169400 }, { "epoch": 4.34, "learning_rate": 3.5526180961164055e-05, "loss": 3.108, "step": 169500 }, { "epoch": 4.34, "learning_rate": 3.5517641834887456e-05, "loss": 2.7552, "step": 169600 }, { "epoch": 4.35, "learning_rate": 3.550910270861086e-05, "loss": 3.0094, "step": 169700 }, { "epoch": 4.35, "learning_rate": 3.550056358233426e-05, "loss": 2.8699, "step": 169800 }, { "epoch": 4.35, "learning_rate": 3.549202445605766e-05, "loss": 2.8333, "step": 169900 }, { "epoch": 4.35, "learning_rate": 3.5483485329781055e-05, "loss": 2.8573, "step": 170000 }, { "epoch": 4.36, "learning_rate": 3.5474946203504464e-05, "loss": 2.8919, "step": 170100 }, { "epoch": 4.36, "learning_rate": 3.546640707722786e-05, "loss": 3.0407, "step": 170200 }, { "epoch": 4.36, "learning_rate": 3.545786795095126e-05, "loss": 2.8344, "step": 170300 }, { "epoch": 4.37, "learning_rate": 3.544932882467466e-05, "loss": 2.8432, "step": 170400 }, { "epoch": 4.37, "learning_rate": 3.544078969839806e-05, "loss": 2.646, "step": 170500 }, { "epoch": 4.37, "learning_rate": 3.5432250572121465e-05, "loss": 2.7221, "step": 170600 }, { "epoch": 4.37, "learning_rate": 3.5423711445844866e-05, "loss": 2.8708, "step": 170700 }, { "epoch": 4.38, "learning_rate": 3.541517231956826e-05, "loss": 2.8989, "step": 170800 }, { "epoch": 4.38, "learning_rate": 3.540663319329167e-05, "loss": 2.9317, "step": 170900 }, { "epoch": 4.38, "learning_rate": 3.5398094067015064e-05, "loss": 2.7789, "step": 171000 }, { "epoch": 4.38, "learning_rate": 3.5389554940738466e-05, "loss": 2.9044, "step": 171100 }, { "epoch": 4.39, "learning_rate": 3.538101581446187e-05, "loss": 2.8208, "step": 171200 }, { "epoch": 4.39, "learning_rate": 3.537247668818527e-05, "loss": 2.7788, "step": 171300 }, { "epoch": 4.39, "learning_rate": 3.5363937561908664e-05, "loss": 2.7944, "step": 171400 }, { "epoch": 4.39, "learning_rate": 3.535539843563207e-05, "loss": 2.837, "step": 171500 }, { "epoch": 4.4, "learning_rate": 3.534685930935547e-05, "loss": 2.9127, "step": 171600 }, { "epoch": 4.4, "learning_rate": 3.533832018307887e-05, "loss": 2.847, "step": 171700 }, { "epoch": 4.4, "learning_rate": 3.532978105680227e-05, "loss": 2.853, "step": 171800 }, { "epoch": 4.4, "learning_rate": 3.532124193052567e-05, "loss": 2.6868, "step": 171900 }, { "epoch": 4.41, "learning_rate": 3.531270280424907e-05, "loss": 3.0012, "step": 172000 }, { "epoch": 4.41, "learning_rate": 3.5304163677972474e-05, "loss": 3.0031, "step": 172100 }, { "epoch": 4.41, "learning_rate": 3.529562455169587e-05, "loss": 2.8709, "step": 172200 }, { "epoch": 4.41, "learning_rate": 3.528708542541928e-05, "loss": 2.707, "step": 172300 }, { "epoch": 4.42, "learning_rate": 3.527854629914267e-05, "loss": 2.6456, "step": 172400 }, { "epoch": 4.42, "learning_rate": 3.5270007172866074e-05, "loss": 2.6917, "step": 172500 }, { "epoch": 4.42, "learning_rate": 3.5261468046589475e-05, "loss": 2.7597, "step": 172600 }, { "epoch": 4.42, "learning_rate": 3.525292892031288e-05, "loss": 2.5973, "step": 172700 }, { "epoch": 4.43, "learning_rate": 3.524438979403628e-05, "loss": 2.7756, "step": 172800 }, { "epoch": 4.43, "learning_rate": 3.523585066775968e-05, "loss": 2.8136, "step": 172900 }, { "epoch": 4.43, "learning_rate": 3.5227311541483075e-05, "loss": 2.7833, "step": 173000 }, { "epoch": 4.43, "learning_rate": 3.521877241520648e-05, "loss": 2.7723, "step": 173100 }, { "epoch": 4.44, "learning_rate": 3.521023328892988e-05, "loss": 2.8283, "step": 173200 }, { "epoch": 4.44, "learning_rate": 3.520169416265328e-05, "loss": 3.0486, "step": 173300 }, { "epoch": 4.44, "learning_rate": 3.519315503637668e-05, "loss": 2.9591, "step": 173400 }, { "epoch": 4.44, "learning_rate": 3.518461591010008e-05, "loss": 2.7472, "step": 173500 }, { "epoch": 4.45, "learning_rate": 3.517607678382348e-05, "loss": 2.8585, "step": 173600 }, { "epoch": 4.45, "learning_rate": 3.5167537657546886e-05, "loss": 2.9055, "step": 173700 }, { "epoch": 4.45, "learning_rate": 3.515899853127028e-05, "loss": 2.8869, "step": 173800 }, { "epoch": 4.45, "learning_rate": 3.515045940499368e-05, "loss": 2.9449, "step": 173900 }, { "epoch": 4.46, "learning_rate": 3.5141920278717084e-05, "loss": 2.7024, "step": 174000 }, { "epoch": 4.46, "learning_rate": 3.5133381152440485e-05, "loss": 2.931, "step": 174100 }, { "epoch": 4.46, "learning_rate": 3.512484202616389e-05, "loss": 2.8525, "step": 174200 }, { "epoch": 4.47, "learning_rate": 3.511630289988729e-05, "loss": 2.9064, "step": 174300 }, { "epoch": 4.47, "learning_rate": 3.510776377361068e-05, "loss": 2.6678, "step": 174400 }, { "epoch": 4.47, "learning_rate": 3.509922464733409e-05, "loss": 2.9454, "step": 174500 }, { "epoch": 4.47, "learning_rate": 3.5090685521057486e-05, "loss": 2.7788, "step": 174600 }, { "epoch": 4.48, "learning_rate": 3.508214639478089e-05, "loss": 2.949, "step": 174700 }, { "epoch": 4.48, "learning_rate": 3.507360726850429e-05, "loss": 2.8295, "step": 174800 }, { "epoch": 4.48, "learning_rate": 3.506506814222769e-05, "loss": 2.8091, "step": 174900 }, { "epoch": 4.48, "learning_rate": 3.505652901595109e-05, "loss": 2.8725, "step": 175000 }, { "epoch": 4.49, "learning_rate": 3.5047989889674494e-05, "loss": 2.9462, "step": 175100 }, { "epoch": 4.49, "learning_rate": 3.503945076339789e-05, "loss": 2.9779, "step": 175200 }, { "epoch": 4.49, "learning_rate": 3.50309116371213e-05, "loss": 2.9337, "step": 175300 }, { "epoch": 4.49, "learning_rate": 3.502237251084469e-05, "loss": 2.7945, "step": 175400 }, { "epoch": 4.5, "learning_rate": 3.501383338456809e-05, "loss": 2.8266, "step": 175500 }, { "epoch": 4.5, "learning_rate": 3.5005294258291495e-05, "loss": 2.7534, "step": 175600 }, { "epoch": 4.5, "learning_rate": 3.499675513201489e-05, "loss": 2.9093, "step": 175700 }, { "epoch": 4.5, "learning_rate": 3.49882160057383e-05, "loss": 2.8192, "step": 175800 }, { "epoch": 4.51, "learning_rate": 3.497967687946169e-05, "loss": 2.7516, "step": 175900 }, { "epoch": 4.51, "learning_rate": 3.4971137753185094e-05, "loss": 2.8898, "step": 176000 }, { "epoch": 4.51, "learning_rate": 3.4962598626908496e-05, "loss": 2.9933, "step": 176100 }, { "epoch": 4.51, "learning_rate": 3.49540595006319e-05, "loss": 3.0046, "step": 176200 }, { "epoch": 4.52, "learning_rate": 3.494552037435529e-05, "loss": 2.8737, "step": 176300 }, { "epoch": 4.52, "learning_rate": 3.49369812480787e-05, "loss": 2.9585, "step": 176400 }, { "epoch": 4.52, "learning_rate": 3.4928442121802095e-05, "loss": 2.7224, "step": 176500 }, { "epoch": 4.52, "learning_rate": 3.49199029955255e-05, "loss": 2.9317, "step": 176600 }, { "epoch": 4.53, "learning_rate": 3.49113638692489e-05, "loss": 2.896, "step": 176700 }, { "epoch": 4.53, "learning_rate": 3.49028247429723e-05, "loss": 2.7501, "step": 176800 }, { "epoch": 4.53, "learning_rate": 3.48942856166957e-05, "loss": 2.8642, "step": 176900 }, { "epoch": 4.53, "learning_rate": 3.48857464904191e-05, "loss": 3.2009, "step": 177000 }, { "epoch": 4.54, "learning_rate": 3.48772073641425e-05, "loss": 2.8458, "step": 177100 }, { "epoch": 4.54, "learning_rate": 3.4868668237865906e-05, "loss": 2.7384, "step": 177200 }, { "epoch": 4.54, "learning_rate": 3.48601291115893e-05, "loss": 2.9411, "step": 177300 }, { "epoch": 4.54, "learning_rate": 3.48515899853127e-05, "loss": 2.8195, "step": 177400 }, { "epoch": 4.55, "learning_rate": 3.4843050859036104e-05, "loss": 2.8032, "step": 177500 }, { "epoch": 4.55, "learning_rate": 3.4834511732759505e-05, "loss": 2.8678, "step": 177600 }, { "epoch": 4.55, "learning_rate": 3.482597260648291e-05, "loss": 2.7897, "step": 177700 }, { "epoch": 4.55, "learning_rate": 3.481743348020631e-05, "loss": 2.8494, "step": 177800 }, { "epoch": 4.56, "learning_rate": 3.48088943539297e-05, "loss": 2.9871, "step": 177900 }, { "epoch": 4.56, "learning_rate": 3.480035522765311e-05, "loss": 2.8696, "step": 178000 }, { "epoch": 4.56, "learning_rate": 3.4791816101376506e-05, "loss": 2.8582, "step": 178100 }, { "epoch": 4.57, "learning_rate": 3.478327697509991e-05, "loss": 2.8457, "step": 178200 }, { "epoch": 4.57, "learning_rate": 3.477473784882331e-05, "loss": 2.8258, "step": 178300 }, { "epoch": 4.57, "learning_rate": 3.476619872254671e-05, "loss": 2.6707, "step": 178400 }, { "epoch": 4.57, "learning_rate": 3.4757659596270106e-05, "loss": 2.8959, "step": 178500 }, { "epoch": 4.58, "learning_rate": 3.4749120469993514e-05, "loss": 2.7047, "step": 178600 }, { "epoch": 4.58, "learning_rate": 3.474058134371691e-05, "loss": 2.8022, "step": 178700 }, { "epoch": 4.58, "learning_rate": 3.473204221744031e-05, "loss": 2.9514, "step": 178800 }, { "epoch": 4.58, "learning_rate": 3.472350309116371e-05, "loss": 2.7032, "step": 178900 }, { "epoch": 4.59, "learning_rate": 3.4714963964887114e-05, "loss": 2.7697, "step": 179000 }, { "epoch": 4.59, "learning_rate": 3.4706424838610515e-05, "loss": 2.6671, "step": 179100 }, { "epoch": 4.59, "learning_rate": 3.4697885712333917e-05, "loss": 2.9532, "step": 179200 }, { "epoch": 4.59, "learning_rate": 3.468934658605731e-05, "loss": 2.895, "step": 179300 }, { "epoch": 4.6, "learning_rate": 3.468080745978072e-05, "loss": 2.6902, "step": 179400 }, { "epoch": 4.6, "learning_rate": 3.4672268333504114e-05, "loss": 2.9458, "step": 179500 }, { "epoch": 4.6, "learning_rate": 3.4663729207227516e-05, "loss": 2.8755, "step": 179600 }, { "epoch": 4.6, "learning_rate": 3.465519008095092e-05, "loss": 2.7573, "step": 179700 }, { "epoch": 4.61, "learning_rate": 3.464665095467432e-05, "loss": 2.971, "step": 179800 }, { "epoch": 4.61, "learning_rate": 3.463811182839772e-05, "loss": 2.7879, "step": 179900 }, { "epoch": 4.61, "learning_rate": 3.462957270212112e-05, "loss": 2.742, "step": 180000 }, { "epoch": 4.61, "learning_rate": 3.462103357584452e-05, "loss": 2.8963, "step": 180100 }, { "epoch": 4.62, "learning_rate": 3.4612494449567925e-05, "loss": 2.8663, "step": 180200 }, { "epoch": 4.62, "learning_rate": 3.460395532329132e-05, "loss": 2.8793, "step": 180300 }, { "epoch": 4.62, "learning_rate": 3.459541619701472e-05, "loss": 2.8008, "step": 180400 }, { "epoch": 4.62, "learning_rate": 3.458687707073812e-05, "loss": 2.766, "step": 180500 }, { "epoch": 4.63, "learning_rate": 3.4578337944461525e-05, "loss": 2.9717, "step": 180600 }, { "epoch": 4.63, "learning_rate": 3.4569798818184926e-05, "loss": 2.7027, "step": 180700 }, { "epoch": 4.63, "learning_rate": 3.456125969190833e-05, "loss": 2.6709, "step": 180800 }, { "epoch": 4.63, "learning_rate": 3.455272056563172e-05, "loss": 2.7249, "step": 180900 }, { "epoch": 4.64, "learning_rate": 3.454418143935513e-05, "loss": 2.7697, "step": 181000 }, { "epoch": 4.64, "learning_rate": 3.4535642313078526e-05, "loss": 2.771, "step": 181100 }, { "epoch": 4.64, "learning_rate": 3.452710318680193e-05, "loss": 2.6931, "step": 181200 }, { "epoch": 4.64, "learning_rate": 3.451856406052533e-05, "loss": 2.8809, "step": 181300 }, { "epoch": 4.65, "learning_rate": 3.451002493424873e-05, "loss": 2.9175, "step": 181400 }, { "epoch": 4.65, "learning_rate": 3.4501485807972125e-05, "loss": 2.838, "step": 181500 }, { "epoch": 4.65, "learning_rate": 3.4492946681695533e-05, "loss": 2.7583, "step": 181600 }, { "epoch": 4.65, "learning_rate": 3.448440755541893e-05, "loss": 2.7249, "step": 181700 }, { "epoch": 4.66, "learning_rate": 3.447586842914233e-05, "loss": 2.9048, "step": 181800 }, { "epoch": 4.66, "learning_rate": 3.446732930286573e-05, "loss": 2.6823, "step": 181900 }, { "epoch": 4.66, "learning_rate": 3.445879017658913e-05, "loss": 2.8402, "step": 182000 }, { "epoch": 4.66, "learning_rate": 3.4450251050312534e-05, "loss": 2.8399, "step": 182100 }, { "epoch": 4.67, "learning_rate": 3.4441711924035936e-05, "loss": 2.8171, "step": 182200 }, { "epoch": 4.67, "learning_rate": 3.443317279775933e-05, "loss": 2.7929, "step": 182300 }, { "epoch": 4.67, "learning_rate": 3.442463367148274e-05, "loss": 2.8139, "step": 182400 }, { "epoch": 4.68, "learning_rate": 3.4416094545206134e-05, "loss": 2.6094, "step": 182500 }, { "epoch": 4.68, "learning_rate": 3.4407555418929535e-05, "loss": 2.8512, "step": 182600 }, { "epoch": 4.68, "learning_rate": 3.439901629265294e-05, "loss": 2.9651, "step": 182700 }, { "epoch": 4.68, "learning_rate": 3.439047716637634e-05, "loss": 2.8471, "step": 182800 }, { "epoch": 4.69, "learning_rate": 3.438193804009974e-05, "loss": 2.7212, "step": 182900 }, { "epoch": 4.69, "learning_rate": 3.437339891382314e-05, "loss": 2.7698, "step": 183000 }, { "epoch": 4.69, "learning_rate": 3.4364859787546536e-05, "loss": 2.7476, "step": 183100 }, { "epoch": 4.69, "learning_rate": 3.4356320661269945e-05, "loss": 3.0972, "step": 183200 }, { "epoch": 4.7, "learning_rate": 3.434778153499334e-05, "loss": 2.8557, "step": 183300 }, { "epoch": 4.7, "learning_rate": 3.433924240871674e-05, "loss": 2.9168, "step": 183400 }, { "epoch": 4.7, "learning_rate": 3.433070328244014e-05, "loss": 2.893, "step": 183500 }, { "epoch": 4.7, "learning_rate": 3.4322164156163544e-05, "loss": 2.8344, "step": 183600 }, { "epoch": 4.71, "learning_rate": 3.431362502988694e-05, "loss": 2.7075, "step": 183700 }, { "epoch": 4.71, "learning_rate": 3.430508590361035e-05, "loss": 2.7563, "step": 183800 }, { "epoch": 4.71, "learning_rate": 3.429654677733374e-05, "loss": 2.7989, "step": 183900 }, { "epoch": 4.71, "learning_rate": 3.4288007651057143e-05, "loss": 2.8689, "step": 184000 }, { "epoch": 4.72, "learning_rate": 3.4279468524780545e-05, "loss": 2.7793, "step": 184100 }, { "epoch": 4.72, "learning_rate": 3.4270929398503947e-05, "loss": 2.935, "step": 184200 }, { "epoch": 4.72, "learning_rate": 3.426239027222735e-05, "loss": 2.8373, "step": 184300 }, { "epoch": 4.72, "learning_rate": 3.425385114595075e-05, "loss": 2.7941, "step": 184400 }, { "epoch": 4.73, "learning_rate": 3.4245312019674144e-05, "loss": 2.8139, "step": 184500 }, { "epoch": 4.73, "learning_rate": 3.423677289339755e-05, "loss": 2.9945, "step": 184600 }, { "epoch": 4.73, "learning_rate": 3.422823376712095e-05, "loss": 2.8065, "step": 184700 }, { "epoch": 4.73, "learning_rate": 3.421969464084435e-05, "loss": 2.7373, "step": 184800 }, { "epoch": 4.74, "learning_rate": 3.421115551456775e-05, "loss": 2.9016, "step": 184900 }, { "epoch": 4.74, "learning_rate": 3.420261638829115e-05, "loss": 2.7924, "step": 185000 }, { "epoch": 4.74, "learning_rate": 3.4194077262014554e-05, "loss": 2.6757, "step": 185100 }, { "epoch": 4.74, "learning_rate": 3.4185538135737955e-05, "loss": 2.98, "step": 185200 }, { "epoch": 4.75, "learning_rate": 3.417699900946135e-05, "loss": 2.6377, "step": 185300 }, { "epoch": 4.75, "learning_rate": 3.416845988318476e-05, "loss": 2.7265, "step": 185400 }, { "epoch": 4.75, "learning_rate": 3.415992075690815e-05, "loss": 3.0404, "step": 185500 }, { "epoch": 4.75, "learning_rate": 3.4151381630631555e-05, "loss": 2.854, "step": 185600 }, { "epoch": 4.76, "learning_rate": 3.4142842504354956e-05, "loss": 2.8406, "step": 185700 }, { "epoch": 4.76, "learning_rate": 3.413430337807836e-05, "loss": 2.8111, "step": 185800 }, { "epoch": 4.76, "learning_rate": 3.412576425180176e-05, "loss": 2.713, "step": 185900 }, { "epoch": 4.76, "learning_rate": 3.411722512552516e-05, "loss": 2.9278, "step": 186000 }, { "epoch": 4.77, "learning_rate": 3.4108685999248556e-05, "loss": 2.7635, "step": 186100 }, { "epoch": 4.77, "learning_rate": 3.4100146872971964e-05, "loss": 2.7555, "step": 186200 }, { "epoch": 4.77, "learning_rate": 3.409160774669536e-05, "loss": 2.7749, "step": 186300 }, { "epoch": 4.78, "learning_rate": 3.408306862041876e-05, "loss": 2.7755, "step": 186400 }, { "epoch": 4.78, "learning_rate": 3.407452949414216e-05, "loss": 2.8913, "step": 186500 }, { "epoch": 4.78, "learning_rate": 3.4065990367865563e-05, "loss": 2.9017, "step": 186600 }, { "epoch": 4.78, "learning_rate": 3.405745124158896e-05, "loss": 2.6779, "step": 186700 }, { "epoch": 4.79, "learning_rate": 3.4048912115312367e-05, "loss": 2.8349, "step": 186800 }, { "epoch": 4.79, "learning_rate": 3.404037298903576e-05, "loss": 3.0744, "step": 186900 }, { "epoch": 4.79, "learning_rate": 3.403183386275916e-05, "loss": 2.8017, "step": 187000 }, { "epoch": 4.79, "learning_rate": 3.4023294736482564e-05, "loss": 2.6702, "step": 187100 }, { "epoch": 4.8, "learning_rate": 3.4014755610205966e-05, "loss": 2.7281, "step": 187200 }, { "epoch": 4.8, "learning_rate": 3.400621648392937e-05, "loss": 2.7488, "step": 187300 }, { "epoch": 4.8, "learning_rate": 3.399767735765277e-05, "loss": 2.7947, "step": 187400 }, { "epoch": 4.8, "learning_rate": 3.3989138231376164e-05, "loss": 2.8185, "step": 187500 }, { "epoch": 4.81, "learning_rate": 3.398059910509957e-05, "loss": 2.8457, "step": 187600 }, { "epoch": 4.81, "learning_rate": 3.397205997882297e-05, "loss": 2.9416, "step": 187700 }, { "epoch": 4.81, "learning_rate": 3.396352085254637e-05, "loss": 2.7752, "step": 187800 }, { "epoch": 4.81, "learning_rate": 3.395498172626977e-05, "loss": 2.6326, "step": 187900 }, { "epoch": 4.82, "learning_rate": 3.394644259999317e-05, "loss": 2.7806, "step": 188000 }, { "epoch": 4.82, "learning_rate": 3.393790347371657e-05, "loss": 2.8613, "step": 188100 }, { "epoch": 4.82, "learning_rate": 3.3929364347439975e-05, "loss": 2.9939, "step": 188200 }, { "epoch": 4.82, "learning_rate": 3.392082522116337e-05, "loss": 2.9626, "step": 188300 }, { "epoch": 4.83, "learning_rate": 3.391228609488678e-05, "loss": 2.8134, "step": 188400 }, { "epoch": 4.83, "learning_rate": 3.390374696861017e-05, "loss": 2.6093, "step": 188500 }, { "epoch": 4.83, "learning_rate": 3.3895207842333574e-05, "loss": 2.837, "step": 188600 }, { "epoch": 4.83, "learning_rate": 3.3886668716056976e-05, "loss": 2.8663, "step": 188700 }, { "epoch": 4.84, "learning_rate": 3.387812958978038e-05, "loss": 2.9026, "step": 188800 }, { "epoch": 4.84, "learning_rate": 3.386959046350377e-05, "loss": 2.7949, "step": 188900 }, { "epoch": 4.84, "learning_rate": 3.386105133722718e-05, "loss": 2.7179, "step": 189000 }, { "epoch": 4.84, "learning_rate": 3.3852512210950575e-05, "loss": 2.6774, "step": 189100 }, { "epoch": 4.85, "learning_rate": 3.384397308467398e-05, "loss": 2.7785, "step": 189200 }, { "epoch": 4.85, "learning_rate": 3.383543395839738e-05, "loss": 2.9502, "step": 189300 }, { "epoch": 4.85, "learning_rate": 3.382689483212078e-05, "loss": 2.7294, "step": 189400 }, { "epoch": 4.85, "learning_rate": 3.381835570584418e-05, "loss": 2.7518, "step": 189500 }, { "epoch": 4.86, "learning_rate": 3.380981657956758e-05, "loss": 2.6775, "step": 189600 }, { "epoch": 4.86, "learning_rate": 3.380127745329098e-05, "loss": 2.7685, "step": 189700 }, { "epoch": 4.86, "learning_rate": 3.3792738327014386e-05, "loss": 2.6363, "step": 189800 }, { "epoch": 4.86, "learning_rate": 3.378419920073778e-05, "loss": 2.8164, "step": 189900 }, { "epoch": 4.87, "learning_rate": 3.377566007446118e-05, "loss": 2.8068, "step": 190000 }, { "epoch": 4.87, "learning_rate": 3.3767120948184584e-05, "loss": 2.8893, "step": 190100 }, { "epoch": 4.87, "learning_rate": 3.3758581821907985e-05, "loss": 3.1823, "step": 190200 }, { "epoch": 4.87, "learning_rate": 3.375004269563139e-05, "loss": 2.8149, "step": 190300 }, { "epoch": 4.88, "learning_rate": 3.374150356935479e-05, "loss": 2.8773, "step": 190400 }, { "epoch": 4.88, "learning_rate": 3.373296444307818e-05, "loss": 2.8124, "step": 190500 }, { "epoch": 4.88, "learning_rate": 3.372442531680159e-05, "loss": 2.6555, "step": 190600 }, { "epoch": 4.89, "learning_rate": 3.3715886190524986e-05, "loss": 2.5526, "step": 190700 }, { "epoch": 4.89, "learning_rate": 3.370734706424839e-05, "loss": 2.6868, "step": 190800 }, { "epoch": 4.89, "learning_rate": 3.369880793797179e-05, "loss": 2.7514, "step": 190900 }, { "epoch": 4.89, "learning_rate": 3.369026881169519e-05, "loss": 2.8321, "step": 191000 }, { "epoch": 4.9, "learning_rate": 3.368172968541859e-05, "loss": 2.6201, "step": 191100 }, { "epoch": 4.9, "learning_rate": 3.3673190559141994e-05, "loss": 2.8815, "step": 191200 }, { "epoch": 4.9, "learning_rate": 3.366465143286539e-05, "loss": 2.8936, "step": 191300 }, { "epoch": 4.9, "learning_rate": 3.36561123065888e-05, "loss": 3.011, "step": 191400 }, { "epoch": 4.91, "learning_rate": 3.364757318031219e-05, "loss": 2.8954, "step": 191500 }, { "epoch": 4.91, "learning_rate": 3.363903405403559e-05, "loss": 2.7834, "step": 191600 }, { "epoch": 4.91, "learning_rate": 3.3630494927758995e-05, "loss": 2.6221, "step": 191700 }, { "epoch": 4.91, "learning_rate": 3.3621955801482396e-05, "loss": 2.842, "step": 191800 }, { "epoch": 4.92, "learning_rate": 3.361341667520579e-05, "loss": 2.7935, "step": 191900 }, { "epoch": 4.92, "learning_rate": 3.36048775489292e-05, "loss": 2.9066, "step": 192000 }, { "epoch": 4.92, "learning_rate": 3.3596338422652594e-05, "loss": 2.8521, "step": 192100 }, { "epoch": 4.92, "learning_rate": 3.3587799296375996e-05, "loss": 2.7344, "step": 192200 }, { "epoch": 4.93, "learning_rate": 3.35792601700994e-05, "loss": 2.7969, "step": 192300 }, { "epoch": 4.93, "learning_rate": 3.35707210438228e-05, "loss": 2.8141, "step": 192400 }, { "epoch": 4.93, "learning_rate": 3.35621819175462e-05, "loss": 2.8277, "step": 192500 }, { "epoch": 4.93, "learning_rate": 3.35536427912696e-05, "loss": 3.0706, "step": 192600 }, { "epoch": 4.94, "learning_rate": 3.3545103664993e-05, "loss": 2.6699, "step": 192700 }, { "epoch": 4.94, "learning_rate": 3.3536564538716405e-05, "loss": 2.7326, "step": 192800 }, { "epoch": 4.94, "learning_rate": 3.35280254124398e-05, "loss": 2.9995, "step": 192900 }, { "epoch": 4.94, "learning_rate": 3.35194862861632e-05, "loss": 2.6497, "step": 193000 }, { "epoch": 4.95, "learning_rate": 3.35109471598866e-05, "loss": 2.7172, "step": 193100 }, { "epoch": 4.95, "learning_rate": 3.350240803361e-05, "loss": 2.6651, "step": 193200 }, { "epoch": 4.95, "learning_rate": 3.3493868907333406e-05, "loss": 2.6778, "step": 193300 }, { "epoch": 4.95, "learning_rate": 3.34853297810568e-05, "loss": 2.9511, "step": 193400 }, { "epoch": 4.96, "learning_rate": 3.34767906547802e-05, "loss": 2.8123, "step": 193500 }, { "epoch": 4.96, "learning_rate": 3.3468251528503604e-05, "loss": 2.7655, "step": 193600 }, { "epoch": 4.96, "learning_rate": 3.3459712402227006e-05, "loss": 2.7086, "step": 193700 }, { "epoch": 4.96, "learning_rate": 3.345117327595041e-05, "loss": 3.0242, "step": 193800 }, { "epoch": 4.97, "learning_rate": 3.344263414967381e-05, "loss": 2.7939, "step": 193900 }, { "epoch": 4.97, "learning_rate": 3.3434095023397203e-05, "loss": 2.7467, "step": 194000 }, { "epoch": 4.97, "learning_rate": 3.342555589712061e-05, "loss": 2.789, "step": 194100 }, { "epoch": 4.97, "learning_rate": 3.3417016770844007e-05, "loss": 2.9062, "step": 194200 }, { "epoch": 4.98, "learning_rate": 3.340847764456741e-05, "loss": 2.9673, "step": 194300 }, { "epoch": 4.98, "learning_rate": 3.339993851829081e-05, "loss": 2.7477, "step": 194400 }, { "epoch": 4.98, "learning_rate": 3.339139939201421e-05, "loss": 2.7192, "step": 194500 }, { "epoch": 4.99, "learning_rate": 3.3382860265737606e-05, "loss": 2.88, "step": 194600 }, { "epoch": 4.99, "learning_rate": 3.3374321139461014e-05, "loss": 2.7896, "step": 194700 }, { "epoch": 4.99, "learning_rate": 3.336578201318441e-05, "loss": 2.6271, "step": 194800 }, { "epoch": 4.99, "learning_rate": 3.335724288690781e-05, "loss": 2.864, "step": 194900 }, { "epoch": 5.0, "learning_rate": 3.334870376063121e-05, "loss": 2.8348, "step": 195000 }, { "epoch": 5.0, "learning_rate": 3.3340164634354614e-05, "loss": 2.8911, "step": 195100 }, { "epoch": 5.0, "learning_rate": 3.3331625508078015e-05, "loss": 2.7296, "step": 195200 }, { "epoch": 5.0, "learning_rate": 3.332308638180142e-05, "loss": 2.6448, "step": 195300 }, { "epoch": 5.01, "learning_rate": 3.331454725552481e-05, "loss": 2.948, "step": 195400 }, { "epoch": 5.01, "learning_rate": 3.330600812924822e-05, "loss": 2.8449, "step": 195500 }, { "epoch": 5.01, "learning_rate": 3.3297469002971615e-05, "loss": 2.5992, "step": 195600 }, { "epoch": 5.01, "learning_rate": 3.3288929876695016e-05, "loss": 2.7582, "step": 195700 }, { "epoch": 5.02, "learning_rate": 3.328039075041842e-05, "loss": 2.842, "step": 195800 }, { "epoch": 5.02, "learning_rate": 3.327185162414182e-05, "loss": 2.8697, "step": 195900 }, { "epoch": 5.02, "learning_rate": 3.326331249786522e-05, "loss": 2.8471, "step": 196000 }, { "epoch": 5.02, "learning_rate": 3.325477337158862e-05, "loss": 2.8698, "step": 196100 }, { "epoch": 5.03, "learning_rate": 3.324623424531202e-05, "loss": 2.6487, "step": 196200 }, { "epoch": 5.03, "learning_rate": 3.3237695119035425e-05, "loss": 2.7105, "step": 196300 }, { "epoch": 5.03, "learning_rate": 3.322915599275882e-05, "loss": 2.8269, "step": 196400 }, { "epoch": 5.03, "learning_rate": 3.322061686648222e-05, "loss": 2.7231, "step": 196500 }, { "epoch": 5.04, "learning_rate": 3.321207774020562e-05, "loss": 2.7965, "step": 196600 }, { "epoch": 5.04, "learning_rate": 3.3203538613929025e-05, "loss": 2.7904, "step": 196700 }, { "epoch": 5.04, "learning_rate": 3.319499948765242e-05, "loss": 2.9175, "step": 196800 }, { "epoch": 5.04, "learning_rate": 3.318646036137583e-05, "loss": 2.6893, "step": 196900 }, { "epoch": 5.05, "learning_rate": 3.317792123509922e-05, "loss": 2.6112, "step": 197000 }, { "epoch": 5.05, "learning_rate": 3.3169382108822624e-05, "loss": 2.7459, "step": 197100 }, { "epoch": 5.05, "learning_rate": 3.3160842982546026e-05, "loss": 2.8013, "step": 197200 }, { "epoch": 5.05, "learning_rate": 3.315230385626943e-05, "loss": 2.6266, "step": 197300 }, { "epoch": 5.06, "learning_rate": 3.314376472999283e-05, "loss": 3.0153, "step": 197400 }, { "epoch": 5.06, "learning_rate": 3.313522560371623e-05, "loss": 2.8443, "step": 197500 }, { "epoch": 5.06, "learning_rate": 3.3126686477439625e-05, "loss": 2.9788, "step": 197600 }, { "epoch": 5.06, "learning_rate": 3.3118147351163034e-05, "loss": 2.8516, "step": 197700 }, { "epoch": 5.07, "learning_rate": 3.310960822488643e-05, "loss": 2.7007, "step": 197800 }, { "epoch": 5.07, "learning_rate": 3.310106909860983e-05, "loss": 2.8234, "step": 197900 }, { "epoch": 5.07, "learning_rate": 3.309252997233323e-05, "loss": 2.8521, "step": 198000 }, { "epoch": 5.07, "learning_rate": 3.308399084605663e-05, "loss": 2.7973, "step": 198100 }, { "epoch": 5.08, "learning_rate": 3.3075451719780035e-05, "loss": 2.8427, "step": 198200 }, { "epoch": 5.08, "learning_rate": 3.3066912593503436e-05, "loss": 2.7293, "step": 198300 }, { "epoch": 5.08, "learning_rate": 3.305837346722683e-05, "loss": 2.7858, "step": 198400 }, { "epoch": 5.09, "learning_rate": 3.304983434095024e-05, "loss": 2.7012, "step": 198500 }, { "epoch": 5.09, "learning_rate": 3.3041295214673634e-05, "loss": 2.5829, "step": 198600 }, { "epoch": 5.09, "learning_rate": 3.3032756088397036e-05, "loss": 2.7131, "step": 198700 }, { "epoch": 5.09, "learning_rate": 3.302421696212044e-05, "loss": 2.8127, "step": 198800 }, { "epoch": 5.1, "learning_rate": 3.301567783584384e-05, "loss": 2.8022, "step": 198900 }, { "epoch": 5.1, "learning_rate": 3.300713870956724e-05, "loss": 2.5661, "step": 199000 }, { "epoch": 5.1, "learning_rate": 3.299859958329064e-05, "loss": 2.697, "step": 199100 }, { "epoch": 5.1, "learning_rate": 3.2990060457014037e-05, "loss": 2.8766, "step": 199200 }, { "epoch": 5.11, "learning_rate": 3.2981521330737445e-05, "loss": 2.7826, "step": 199300 }, { "epoch": 5.11, "learning_rate": 3.297298220446084e-05, "loss": 2.7409, "step": 199400 }, { "epoch": 5.11, "learning_rate": 3.296444307818424e-05, "loss": 2.773, "step": 199500 }, { "epoch": 5.11, "learning_rate": 3.295590395190764e-05, "loss": 2.9256, "step": 199600 }, { "epoch": 5.12, "learning_rate": 3.2947364825631044e-05, "loss": 2.817, "step": 199700 }, { "epoch": 5.12, "learning_rate": 3.293882569935444e-05, "loss": 2.7419, "step": 199800 }, { "epoch": 5.12, "learning_rate": 3.293028657307785e-05, "loss": 2.7595, "step": 199900 }, { "epoch": 5.12, "learning_rate": 3.292174744680124e-05, "loss": 2.7349, "step": 200000 }, { "epoch": 5.13, "learning_rate": 3.2913208320524644e-05, "loss": 2.6855, "step": 200100 }, { "epoch": 5.13, "learning_rate": 3.2904669194248045e-05, "loss": 2.8252, "step": 200200 }, { "epoch": 5.13, "learning_rate": 3.289613006797145e-05, "loss": 2.8531, "step": 200300 }, { "epoch": 5.13, "learning_rate": 3.288759094169485e-05, "loss": 2.7198, "step": 200400 }, { "epoch": 5.14, "learning_rate": 3.287905181541825e-05, "loss": 2.8435, "step": 200500 }, { "epoch": 5.14, "learning_rate": 3.2870512689141645e-05, "loss": 2.7236, "step": 200600 }, { "epoch": 5.14, "learning_rate": 3.286197356286505e-05, "loss": 2.7964, "step": 200700 }, { "epoch": 5.14, "learning_rate": 3.285343443658845e-05, "loss": 2.8058, "step": 200800 }, { "epoch": 5.15, "learning_rate": 3.284489531031185e-05, "loss": 2.6538, "step": 200900 }, { "epoch": 5.15, "learning_rate": 3.283635618403525e-05, "loss": 2.8613, "step": 201000 }, { "epoch": 5.15, "learning_rate": 3.282781705775865e-05, "loss": 2.8554, "step": 201100 }, { "epoch": 5.15, "learning_rate": 3.2819277931482054e-05, "loss": 2.6762, "step": 201200 }, { "epoch": 5.16, "learning_rate": 3.2810738805205455e-05, "loss": 2.772, "step": 201300 }, { "epoch": 5.16, "learning_rate": 3.280219967892885e-05, "loss": 2.5641, "step": 201400 }, { "epoch": 5.16, "learning_rate": 3.279366055265226e-05, "loss": 2.8266, "step": 201500 }, { "epoch": 5.16, "learning_rate": 3.278512142637565e-05, "loss": 3.0291, "step": 201600 }, { "epoch": 5.17, "learning_rate": 3.2776582300099055e-05, "loss": 2.648, "step": 201700 }, { "epoch": 5.17, "learning_rate": 3.2768043173822456e-05, "loss": 2.8076, "step": 201800 }, { "epoch": 5.17, "learning_rate": 3.275950404754586e-05, "loss": 2.7914, "step": 201900 }, { "epoch": 5.17, "learning_rate": 3.275096492126925e-05, "loss": 2.8401, "step": 202000 }, { "epoch": 5.18, "learning_rate": 3.274242579499266e-05, "loss": 2.6394, "step": 202100 }, { "epoch": 5.18, "learning_rate": 3.2733886668716056e-05, "loss": 2.8558, "step": 202200 }, { "epoch": 5.18, "learning_rate": 3.272534754243946e-05, "loss": 2.7711, "step": 202300 }, { "epoch": 5.18, "learning_rate": 3.271680841616286e-05, "loss": 2.9304, "step": 202400 }, { "epoch": 5.19, "learning_rate": 3.270826928988626e-05, "loss": 2.6178, "step": 202500 }, { "epoch": 5.19, "learning_rate": 3.269973016360966e-05, "loss": 2.8751, "step": 202600 }, { "epoch": 5.19, "learning_rate": 3.2691191037333064e-05, "loss": 2.7741, "step": 202700 }, { "epoch": 5.2, "learning_rate": 3.268265191105646e-05, "loss": 2.7994, "step": 202800 }, { "epoch": 5.2, "learning_rate": 3.267411278477987e-05, "loss": 2.7062, "step": 202900 }, { "epoch": 5.2, "learning_rate": 3.266557365850326e-05, "loss": 2.804, "step": 203000 }, { "epoch": 5.2, "learning_rate": 3.265703453222666e-05, "loss": 2.6745, "step": 203100 }, { "epoch": 5.21, "learning_rate": 3.2648495405950065e-05, "loss": 2.8282, "step": 203200 }, { "epoch": 5.21, "learning_rate": 3.2639956279673466e-05, "loss": 2.6831, "step": 203300 }, { "epoch": 5.21, "learning_rate": 3.263141715339687e-05, "loss": 2.7802, "step": 203400 }, { "epoch": 5.21, "learning_rate": 3.262287802712027e-05, "loss": 2.7884, "step": 203500 }, { "epoch": 5.22, "learning_rate": 3.2614338900843664e-05, "loss": 2.6893, "step": 203600 }, { "epoch": 5.22, "learning_rate": 3.260579977456707e-05, "loss": 2.8855, "step": 203700 }, { "epoch": 5.22, "learning_rate": 3.259726064829047e-05, "loss": 2.6815, "step": 203800 }, { "epoch": 5.22, "learning_rate": 3.258872152201387e-05, "loss": 2.7475, "step": 203900 }, { "epoch": 5.23, "learning_rate": 3.258018239573727e-05, "loss": 2.7349, "step": 204000 }, { "epoch": 5.23, "learning_rate": 3.257164326946067e-05, "loss": 2.6756, "step": 204100 }, { "epoch": 5.23, "learning_rate": 3.256310414318407e-05, "loss": 2.6423, "step": 204200 }, { "epoch": 5.23, "learning_rate": 3.2554565016907475e-05, "loss": 2.9113, "step": 204300 }, { "epoch": 5.24, "learning_rate": 3.254602589063087e-05, "loss": 2.797, "step": 204400 }, { "epoch": 5.24, "learning_rate": 3.253748676435428e-05, "loss": 2.7763, "step": 204500 }, { "epoch": 5.24, "learning_rate": 3.252894763807767e-05, "loss": 2.8551, "step": 204600 }, { "epoch": 5.24, "learning_rate": 3.2520408511801074e-05, "loss": 2.7685, "step": 204700 }, { "epoch": 5.25, "learning_rate": 3.2511869385524476e-05, "loss": 2.6483, "step": 204800 }, { "epoch": 5.25, "learning_rate": 3.250333025924788e-05, "loss": 2.7825, "step": 204900 }, { "epoch": 5.25, "learning_rate": 3.249479113297127e-05, "loss": 2.5934, "step": 205000 }, { "epoch": 5.25, "learning_rate": 3.248625200669468e-05, "loss": 2.7495, "step": 205100 }, { "epoch": 5.26, "learning_rate": 3.2477712880418075e-05, "loss": 2.629, "step": 205200 }, { "epoch": 5.26, "learning_rate": 3.246917375414148e-05, "loss": 2.8097, "step": 205300 }, { "epoch": 5.26, "learning_rate": 3.246063462786488e-05, "loss": 2.8791, "step": 205400 }, { "epoch": 5.26, "learning_rate": 3.245209550158828e-05, "loss": 2.8065, "step": 205500 }, { "epoch": 5.27, "learning_rate": 3.244355637531168e-05, "loss": 2.5965, "step": 205600 }, { "epoch": 5.27, "learning_rate": 3.243501724903508e-05, "loss": 2.8874, "step": 205700 }, { "epoch": 5.27, "learning_rate": 3.242647812275848e-05, "loss": 2.5646, "step": 205800 }, { "epoch": 5.27, "learning_rate": 3.2417938996481886e-05, "loss": 2.8783, "step": 205900 }, { "epoch": 5.28, "learning_rate": 3.240939987020528e-05, "loss": 2.8532, "step": 206000 }, { "epoch": 5.28, "learning_rate": 3.240086074392868e-05, "loss": 2.7758, "step": 206100 }, { "epoch": 5.28, "learning_rate": 3.2392321617652084e-05, "loss": 2.9672, "step": 206200 }, { "epoch": 5.28, "learning_rate": 3.2383782491375485e-05, "loss": 2.6679, "step": 206300 }, { "epoch": 5.29, "learning_rate": 3.237524336509889e-05, "loss": 2.68, "step": 206400 }, { "epoch": 5.29, "learning_rate": 3.236670423882229e-05, "loss": 2.9546, "step": 206500 }, { "epoch": 5.29, "learning_rate": 3.235816511254568e-05, "loss": 2.665, "step": 206600 }, { "epoch": 5.3, "learning_rate": 3.234962598626909e-05, "loss": 2.6909, "step": 206700 }, { "epoch": 5.3, "learning_rate": 3.2341086859992486e-05, "loss": 2.733, "step": 206800 }, { "epoch": 5.3, "learning_rate": 3.233254773371589e-05, "loss": 2.7725, "step": 206900 }, { "epoch": 5.3, "learning_rate": 3.232400860743929e-05, "loss": 2.975, "step": 207000 }, { "epoch": 5.31, "learning_rate": 3.231546948116269e-05, "loss": 2.8213, "step": 207100 }, { "epoch": 5.31, "learning_rate": 3.2306930354886086e-05, "loss": 2.6112, "step": 207200 }, { "epoch": 5.31, "learning_rate": 3.2298391228609494e-05, "loss": 2.8263, "step": 207300 }, { "epoch": 5.31, "learning_rate": 3.228985210233289e-05, "loss": 2.7241, "step": 207400 }, { "epoch": 5.32, "learning_rate": 3.228131297605629e-05, "loss": 2.7141, "step": 207500 }, { "epoch": 5.32, "learning_rate": 3.227277384977969e-05, "loss": 2.7914, "step": 207600 }, { "epoch": 5.32, "learning_rate": 3.2264234723503094e-05, "loss": 2.7239, "step": 207700 }, { "epoch": 5.32, "learning_rate": 3.2255695597226495e-05, "loss": 2.8226, "step": 207800 }, { "epoch": 5.33, "learning_rate": 3.22471564709499e-05, "loss": 2.7109, "step": 207900 }, { "epoch": 5.33, "learning_rate": 3.223861734467329e-05, "loss": 2.764, "step": 208000 }, { "epoch": 5.33, "learning_rate": 3.22300782183967e-05, "loss": 2.7809, "step": 208100 }, { "epoch": 5.33, "learning_rate": 3.2221539092120095e-05, "loss": 2.7088, "step": 208200 }, { "epoch": 5.34, "learning_rate": 3.2212999965843496e-05, "loss": 2.6485, "step": 208300 }, { "epoch": 5.34, "learning_rate": 3.22044608395669e-05, "loss": 2.7716, "step": 208400 }, { "epoch": 5.34, "learning_rate": 3.21959217132903e-05, "loss": 2.7955, "step": 208500 }, { "epoch": 5.34, "learning_rate": 3.21873825870137e-05, "loss": 2.7065, "step": 208600 }, { "epoch": 5.35, "learning_rate": 3.21788434607371e-05, "loss": 2.7836, "step": 208700 }, { "epoch": 5.35, "learning_rate": 3.21703043344605e-05, "loss": 2.7758, "step": 208800 }, { "epoch": 5.35, "learning_rate": 3.2161765208183905e-05, "loss": 2.7706, "step": 208900 }, { "epoch": 5.35, "learning_rate": 3.21532260819073e-05, "loss": 2.6964, "step": 209000 }, { "epoch": 5.36, "learning_rate": 3.21446869556307e-05, "loss": 2.9809, "step": 209100 }, { "epoch": 5.36, "learning_rate": 3.21361478293541e-05, "loss": 2.6947, "step": 209200 }, { "epoch": 5.36, "learning_rate": 3.2127608703077505e-05, "loss": 2.7077, "step": 209300 }, { "epoch": 5.36, "learning_rate": 3.2119069576800906e-05, "loss": 2.8423, "step": 209400 }, { "epoch": 5.37, "learning_rate": 3.211053045052431e-05, "loss": 2.7861, "step": 209500 }, { "epoch": 5.37, "learning_rate": 3.21019913242477e-05, "loss": 2.81, "step": 209600 }, { "epoch": 5.37, "learning_rate": 3.209345219797111e-05, "loss": 2.8115, "step": 209700 }, { "epoch": 5.37, "learning_rate": 3.2084913071694506e-05, "loss": 2.6927, "step": 209800 }, { "epoch": 5.38, "learning_rate": 3.207637394541791e-05, "loss": 2.8274, "step": 209900 }, { "epoch": 5.38, "learning_rate": 3.206783481914131e-05, "loss": 2.8401, "step": 210000 }, { "epoch": 5.38, "learning_rate": 3.205929569286471e-05, "loss": 2.9757, "step": 210100 }, { "epoch": 5.38, "learning_rate": 3.2050756566588105e-05, "loss": 2.5407, "step": 210200 }, { "epoch": 5.39, "learning_rate": 3.2042217440311513e-05, "loss": 2.6253, "step": 210300 }, { "epoch": 5.39, "learning_rate": 3.203367831403491e-05, "loss": 2.6943, "step": 210400 }, { "epoch": 5.39, "learning_rate": 3.202513918775831e-05, "loss": 2.8777, "step": 210500 }, { "epoch": 5.4, "learning_rate": 3.201660006148171e-05, "loss": 2.6651, "step": 210600 }, { "epoch": 5.4, "learning_rate": 3.2008060935205106e-05, "loss": 2.6112, "step": 210700 }, { "epoch": 5.4, "learning_rate": 3.1999521808928514e-05, "loss": 2.7957, "step": 210800 }, { "epoch": 5.4, "learning_rate": 3.199098268265191e-05, "loss": 2.8197, "step": 210900 }, { "epoch": 5.41, "learning_rate": 3.198244355637531e-05, "loss": 2.8579, "step": 211000 }, { "epoch": 5.41, "learning_rate": 3.197390443009871e-05, "loss": 2.8535, "step": 211100 }, { "epoch": 5.41, "learning_rate": 3.1965365303822114e-05, "loss": 2.813, "step": 211200 }, { "epoch": 5.41, "learning_rate": 3.1956826177545515e-05, "loss": 2.8407, "step": 211300 }, { "epoch": 5.42, "learning_rate": 3.194828705126892e-05, "loss": 2.6361, "step": 211400 }, { "epoch": 5.42, "learning_rate": 3.193974792499231e-05, "loss": 2.9215, "step": 211500 }, { "epoch": 5.42, "learning_rate": 3.193120879871572e-05, "loss": 2.8109, "step": 211600 }, { "epoch": 5.42, "learning_rate": 3.1922669672439115e-05, "loss": 2.7735, "step": 211700 }, { "epoch": 5.43, "learning_rate": 3.1914130546162516e-05, "loss": 2.5616, "step": 211800 }, { "epoch": 5.43, "learning_rate": 3.190559141988592e-05, "loss": 2.8323, "step": 211900 }, { "epoch": 5.43, "learning_rate": 3.189705229360932e-05, "loss": 2.6473, "step": 212000 }, { "epoch": 5.43, "learning_rate": 3.1888513167332714e-05, "loss": 2.6523, "step": 212100 }, { "epoch": 5.44, "learning_rate": 3.187997404105612e-05, "loss": 2.6755, "step": 212200 }, { "epoch": 5.44, "learning_rate": 3.187143491477952e-05, "loss": 2.6132, "step": 212300 }, { "epoch": 5.44, "learning_rate": 3.186289578850292e-05, "loss": 2.573, "step": 212400 }, { "epoch": 5.44, "learning_rate": 3.185435666222632e-05, "loss": 2.8654, "step": 212500 }, { "epoch": 5.45, "learning_rate": 3.184581753594972e-05, "loss": 2.7064, "step": 212600 }, { "epoch": 5.45, "learning_rate": 3.1837278409673124e-05, "loss": 2.9272, "step": 212700 }, { "epoch": 5.45, "learning_rate": 3.1828739283396525e-05, "loss": 2.8052, "step": 212800 }, { "epoch": 5.45, "learning_rate": 3.182020015711992e-05, "loss": 2.6925, "step": 212900 }, { "epoch": 5.46, "learning_rate": 3.181166103084333e-05, "loss": 2.7437, "step": 213000 }, { "epoch": 5.46, "learning_rate": 3.180312190456672e-05, "loss": 2.7572, "step": 213100 }, { "epoch": 5.46, "learning_rate": 3.1794582778290125e-05, "loss": 2.7736, "step": 213200 }, { "epoch": 5.46, "learning_rate": 3.1786043652013526e-05, "loss": 2.6393, "step": 213300 }, { "epoch": 5.47, "learning_rate": 3.177750452573693e-05, "loss": 2.8257, "step": 213400 }, { "epoch": 5.47, "learning_rate": 3.176896539946033e-05, "loss": 2.6962, "step": 213500 }, { "epoch": 5.47, "learning_rate": 3.176042627318373e-05, "loss": 2.7065, "step": 213600 }, { "epoch": 5.47, "learning_rate": 3.1751887146907125e-05, "loss": 2.7483, "step": 213700 }, { "epoch": 5.48, "learning_rate": 3.1743348020630534e-05, "loss": 2.7294, "step": 213800 }, { "epoch": 5.48, "learning_rate": 3.173480889435393e-05, "loss": 2.8052, "step": 213900 }, { "epoch": 5.48, "learning_rate": 3.172626976807733e-05, "loss": 2.643, "step": 214000 }, { "epoch": 5.48, "learning_rate": 3.171773064180073e-05, "loss": 2.8264, "step": 214100 }, { "epoch": 5.49, "learning_rate": 3.170919151552413e-05, "loss": 2.8191, "step": 214200 }, { "epoch": 5.49, "learning_rate": 3.1700652389247535e-05, "loss": 2.7863, "step": 214300 }, { "epoch": 5.49, "learning_rate": 3.1692113262970936e-05, "loss": 2.6596, "step": 214400 }, { "epoch": 5.49, "learning_rate": 3.168357413669433e-05, "loss": 2.6472, "step": 214500 }, { "epoch": 5.5, "learning_rate": 3.167503501041774e-05, "loss": 2.6921, "step": 214600 }, { "epoch": 5.5, "learning_rate": 3.1666495884141134e-05, "loss": 2.5574, "step": 214700 }, { "epoch": 5.5, "learning_rate": 3.1657956757864536e-05, "loss": 2.664, "step": 214800 }, { "epoch": 5.51, "learning_rate": 3.164941763158794e-05, "loss": 2.6997, "step": 214900 }, { "epoch": 5.51, "learning_rate": 3.164087850531134e-05, "loss": 2.64, "step": 215000 }, { "epoch": 5.51, "learning_rate": 3.1632339379034734e-05, "loss": 2.5975, "step": 215100 }, { "epoch": 5.51, "learning_rate": 3.162380025275814e-05, "loss": 2.875, "step": 215200 }, { "epoch": 5.52, "learning_rate": 3.161526112648154e-05, "loss": 2.7729, "step": 215300 }, { "epoch": 5.52, "learning_rate": 3.160672200020494e-05, "loss": 2.5971, "step": 215400 }, { "epoch": 5.52, "learning_rate": 3.159818287392834e-05, "loss": 2.9298, "step": 215500 }, { "epoch": 5.52, "learning_rate": 3.158964374765174e-05, "loss": 2.5974, "step": 215600 }, { "epoch": 5.53, "learning_rate": 3.158110462137514e-05, "loss": 2.7801, "step": 215700 }, { "epoch": 5.53, "learning_rate": 3.1572565495098544e-05, "loss": 2.6669, "step": 215800 }, { "epoch": 5.53, "learning_rate": 3.156402636882194e-05, "loss": 2.7291, "step": 215900 }, { "epoch": 5.53, "learning_rate": 3.155548724254535e-05, "loss": 2.9889, "step": 216000 }, { "epoch": 5.54, "learning_rate": 3.154694811626874e-05, "loss": 2.783, "step": 216100 }, { "epoch": 5.54, "learning_rate": 3.1538408989992144e-05, "loss": 2.7246, "step": 216200 }, { "epoch": 5.54, "learning_rate": 3.1529869863715545e-05, "loss": 2.6294, "step": 216300 }, { "epoch": 5.54, "learning_rate": 3.152133073743895e-05, "loss": 2.8448, "step": 216400 }, { "epoch": 5.55, "learning_rate": 3.151279161116235e-05, "loss": 2.4837, "step": 216500 }, { "epoch": 5.55, "learning_rate": 3.150425248488575e-05, "loss": 2.8523, "step": 216600 }, { "epoch": 5.55, "learning_rate": 3.1495713358609145e-05, "loss": 2.4857, "step": 216700 }, { "epoch": 5.55, "learning_rate": 3.148717423233255e-05, "loss": 2.7957, "step": 216800 }, { "epoch": 5.56, "learning_rate": 3.147863510605595e-05, "loss": 2.5642, "step": 216900 }, { "epoch": 5.56, "learning_rate": 3.147009597977935e-05, "loss": 2.8443, "step": 217000 }, { "epoch": 5.56, "learning_rate": 3.146155685350275e-05, "loss": 2.8137, "step": 217100 }, { "epoch": 5.56, "learning_rate": 3.145301772722615e-05, "loss": 2.6857, "step": 217200 }, { "epoch": 5.57, "learning_rate": 3.144447860094955e-05, "loss": 2.519, "step": 217300 }, { "epoch": 5.57, "learning_rate": 3.1435939474672956e-05, "loss": 2.5561, "step": 217400 }, { "epoch": 5.57, "learning_rate": 3.142740034839635e-05, "loss": 2.6158, "step": 217500 }, { "epoch": 5.57, "learning_rate": 3.141886122211975e-05, "loss": 2.7535, "step": 217600 }, { "epoch": 5.58, "learning_rate": 3.1410322095843154e-05, "loss": 2.5762, "step": 217700 }, { "epoch": 5.58, "learning_rate": 3.1401782969566555e-05, "loss": 2.7406, "step": 217800 }, { "epoch": 5.58, "learning_rate": 3.139324384328996e-05, "loss": 2.6391, "step": 217900 }, { "epoch": 5.58, "learning_rate": 3.138470471701336e-05, "loss": 2.6371, "step": 218000 }, { "epoch": 5.59, "learning_rate": 3.137616559073675e-05, "loss": 2.5723, "step": 218100 }, { "epoch": 5.59, "learning_rate": 3.136762646446016e-05, "loss": 2.6901, "step": 218200 }, { "epoch": 5.59, "learning_rate": 3.1359087338183556e-05, "loss": 2.4828, "step": 218300 }, { "epoch": 5.59, "learning_rate": 3.135054821190696e-05, "loss": 2.7638, "step": 218400 }, { "epoch": 5.6, "learning_rate": 3.134200908563036e-05, "loss": 2.7363, "step": 218500 }, { "epoch": 5.6, "learning_rate": 3.133346995935376e-05, "loss": 2.6146, "step": 218600 }, { "epoch": 5.6, "learning_rate": 3.132493083307716e-05, "loss": 2.7168, "step": 218700 }, { "epoch": 5.61, "learning_rate": 3.1316391706800564e-05, "loss": 2.7791, "step": 218800 }, { "epoch": 5.61, "learning_rate": 3.130785258052396e-05, "loss": 2.8559, "step": 218900 }, { "epoch": 5.61, "learning_rate": 3.129931345424737e-05, "loss": 2.6361, "step": 219000 }, { "epoch": 5.61, "learning_rate": 3.129077432797076e-05, "loss": 2.7913, "step": 219100 }, { "epoch": 5.62, "learning_rate": 3.128223520169416e-05, "loss": 2.7234, "step": 219200 }, { "epoch": 5.62, "learning_rate": 3.1273696075417565e-05, "loss": 2.9367, "step": 219300 }, { "epoch": 5.62, "learning_rate": 3.1265156949140966e-05, "loss": 2.8618, "step": 219400 }, { "epoch": 5.62, "learning_rate": 3.125661782286437e-05, "loss": 2.7544, "step": 219500 }, { "epoch": 5.63, "learning_rate": 3.124807869658777e-05, "loss": 2.6748, "step": 219600 }, { "epoch": 5.63, "learning_rate": 3.1239539570311164e-05, "loss": 2.7111, "step": 219700 }, { "epoch": 5.63, "learning_rate": 3.123100044403457e-05, "loss": 2.7402, "step": 219800 }, { "epoch": 5.63, "learning_rate": 3.122246131775797e-05, "loss": 2.6942, "step": 219900 }, { "epoch": 5.64, "learning_rate": 3.121392219148137e-05, "loss": 2.7211, "step": 220000 }, { "epoch": 5.64, "learning_rate": 3.120538306520477e-05, "loss": 2.649, "step": 220100 }, { "epoch": 5.64, "learning_rate": 3.119684393892817e-05, "loss": 2.8113, "step": 220200 }, { "epoch": 5.64, "learning_rate": 3.118830481265157e-05, "loss": 2.6578, "step": 220300 }, { "epoch": 5.65, "learning_rate": 3.1179765686374975e-05, "loss": 2.8545, "step": 220400 }, { "epoch": 5.65, "learning_rate": 3.117122656009837e-05, "loss": 2.9633, "step": 220500 }, { "epoch": 5.65, "learning_rate": 3.116268743382177e-05, "loss": 2.8168, "step": 220600 }, { "epoch": 5.65, "learning_rate": 3.115414830754517e-05, "loss": 2.6067, "step": 220700 }, { "epoch": 5.66, "learning_rate": 3.1145609181268574e-05, "loss": 2.6563, "step": 220800 }, { "epoch": 5.66, "learning_rate": 3.1137070054991976e-05, "loss": 2.6782, "step": 220900 }, { "epoch": 5.66, "learning_rate": 3.112853092871538e-05, "loss": 2.5729, "step": 221000 }, { "epoch": 5.66, "learning_rate": 3.111999180243877e-05, "loss": 2.6424, "step": 221100 }, { "epoch": 5.67, "learning_rate": 3.111145267616218e-05, "loss": 2.6948, "step": 221200 }, { "epoch": 5.67, "learning_rate": 3.1102913549885575e-05, "loss": 2.6781, "step": 221300 }, { "epoch": 5.67, "learning_rate": 3.109437442360898e-05, "loss": 2.6164, "step": 221400 }, { "epoch": 5.67, "learning_rate": 3.108583529733238e-05, "loss": 2.7599, "step": 221500 }, { "epoch": 5.68, "learning_rate": 3.107729617105578e-05, "loss": 2.6885, "step": 221600 }, { "epoch": 5.68, "learning_rate": 3.106875704477918e-05, "loss": 2.7714, "step": 221700 }, { "epoch": 5.68, "learning_rate": 3.106021791850258e-05, "loss": 2.8554, "step": 221800 }, { "epoch": 5.68, "learning_rate": 3.105167879222598e-05, "loss": 2.8403, "step": 221900 }, { "epoch": 5.69, "learning_rate": 3.1043139665949386e-05, "loss": 2.7891, "step": 222000 }, { "epoch": 5.69, "learning_rate": 3.103460053967278e-05, "loss": 2.7963, "step": 222100 }, { "epoch": 5.69, "learning_rate": 3.102606141339618e-05, "loss": 2.8312, "step": 222200 }, { "epoch": 5.69, "learning_rate": 3.1017522287119584e-05, "loss": 2.5519, "step": 222300 }, { "epoch": 5.7, "learning_rate": 3.1008983160842986e-05, "loss": 2.7113, "step": 222400 }, { "epoch": 5.7, "learning_rate": 3.100044403456639e-05, "loss": 2.6592, "step": 222500 }, { "epoch": 5.7, "learning_rate": 3.099190490828979e-05, "loss": 2.7181, "step": 222600 }, { "epoch": 5.7, "learning_rate": 3.0983365782013184e-05, "loss": 2.6204, "step": 222700 }, { "epoch": 5.71, "learning_rate": 3.097482665573659e-05, "loss": 2.6717, "step": 222800 }, { "epoch": 5.71, "learning_rate": 3.0966287529459987e-05, "loss": 2.8013, "step": 222900 }, { "epoch": 5.71, "learning_rate": 3.095774840318339e-05, "loss": 2.7697, "step": 223000 }, { "epoch": 5.72, "learning_rate": 3.094920927690679e-05, "loss": 2.6964, "step": 223100 }, { "epoch": 5.72, "learning_rate": 3.094067015063019e-05, "loss": 2.7323, "step": 223200 }, { "epoch": 5.72, "learning_rate": 3.0932131024353586e-05, "loss": 2.6465, "step": 223300 }, { "epoch": 5.72, "learning_rate": 3.0923591898076994e-05, "loss": 2.6533, "step": 223400 }, { "epoch": 5.73, "learning_rate": 3.091505277180039e-05, "loss": 2.5259, "step": 223500 }, { "epoch": 5.73, "learning_rate": 3.090651364552379e-05, "loss": 2.943, "step": 223600 }, { "epoch": 5.73, "learning_rate": 3.089797451924719e-05, "loss": 2.6762, "step": 223700 }, { "epoch": 5.73, "learning_rate": 3.0889435392970594e-05, "loss": 2.8889, "step": 223800 }, { "epoch": 5.74, "learning_rate": 3.0880896266693995e-05, "loss": 2.907, "step": 223900 }, { "epoch": 5.74, "learning_rate": 3.08723571404174e-05, "loss": 2.769, "step": 224000 }, { "epoch": 5.74, "learning_rate": 3.086381801414079e-05, "loss": 2.8173, "step": 224100 }, { "epoch": 5.74, "learning_rate": 3.08552788878642e-05, "loss": 2.6643, "step": 224200 }, { "epoch": 5.75, "learning_rate": 3.0846739761587595e-05, "loss": 2.855, "step": 224300 }, { "epoch": 5.75, "learning_rate": 3.0838200635310996e-05, "loss": 2.836, "step": 224400 }, { "epoch": 5.75, "learning_rate": 3.08296615090344e-05, "loss": 2.703, "step": 224500 }, { "epoch": 5.75, "learning_rate": 3.08211223827578e-05, "loss": 2.8927, "step": 224600 }, { "epoch": 5.76, "learning_rate": 3.08125832564812e-05, "loss": 2.6234, "step": 224700 }, { "epoch": 5.76, "learning_rate": 3.08040441302046e-05, "loss": 2.715, "step": 224800 }, { "epoch": 5.76, "learning_rate": 3.0795505003928e-05, "loss": 2.6856, "step": 224900 }, { "epoch": 5.76, "learning_rate": 3.0786965877651406e-05, "loss": 2.6405, "step": 225000 }, { "epoch": 5.77, "learning_rate": 3.07784267513748e-05, "loss": 2.6382, "step": 225100 }, { "epoch": 5.77, "learning_rate": 3.07698876250982e-05, "loss": 2.6424, "step": 225200 }, { "epoch": 5.77, "learning_rate": 3.0761348498821603e-05, "loss": 2.6423, "step": 225300 }, { "epoch": 5.77, "learning_rate": 3.0752809372545005e-05, "loss": 2.7959, "step": 225400 }, { "epoch": 5.78, "learning_rate": 3.07442702462684e-05, "loss": 2.9217, "step": 225500 }, { "epoch": 5.78, "learning_rate": 3.073573111999181e-05, "loss": 2.8175, "step": 225600 }, { "epoch": 5.78, "learning_rate": 3.07271919937152e-05, "loss": 2.6331, "step": 225700 }, { "epoch": 5.78, "learning_rate": 3.0718652867438604e-05, "loss": 2.7062, "step": 225800 }, { "epoch": 5.79, "learning_rate": 3.0710113741162006e-05, "loss": 2.6827, "step": 225900 }, { "epoch": 5.79, "learning_rate": 3.070157461488541e-05, "loss": 2.6571, "step": 226000 }, { "epoch": 5.79, "learning_rate": 3.069303548860881e-05, "loss": 2.5765, "step": 226100 }, { "epoch": 5.79, "learning_rate": 3.068449636233221e-05, "loss": 2.7972, "step": 226200 }, { "epoch": 5.8, "learning_rate": 3.0675957236055605e-05, "loss": 2.7939, "step": 226300 }, { "epoch": 5.8, "learning_rate": 3.0667418109779014e-05, "loss": 2.6674, "step": 226400 }, { "epoch": 5.8, "learning_rate": 3.065887898350241e-05, "loss": 2.6047, "step": 226500 }, { "epoch": 5.8, "learning_rate": 3.065033985722581e-05, "loss": 2.5639, "step": 226600 }, { "epoch": 5.81, "learning_rate": 3.064180073094921e-05, "loss": 2.7367, "step": 226700 }, { "epoch": 5.81, "learning_rate": 3.063326160467261e-05, "loss": 2.879, "step": 226800 }, { "epoch": 5.81, "learning_rate": 3.0624722478396015e-05, "loss": 2.8679, "step": 226900 }, { "epoch": 5.82, "learning_rate": 3.0616183352119416e-05, "loss": 2.6516, "step": 227000 }, { "epoch": 5.82, "learning_rate": 3.060764422584281e-05, "loss": 2.727, "step": 227100 }, { "epoch": 5.82, "learning_rate": 3.059910509956622e-05, "loss": 2.7692, "step": 227200 }, { "epoch": 5.82, "learning_rate": 3.0590565973289614e-05, "loss": 2.6338, "step": 227300 }, { "epoch": 5.83, "learning_rate": 3.0582026847013016e-05, "loss": 2.6727, "step": 227400 }, { "epoch": 5.83, "learning_rate": 3.057348772073642e-05, "loss": 2.5125, "step": 227500 }, { "epoch": 5.83, "learning_rate": 3.056494859445982e-05, "loss": 2.8578, "step": 227600 }, { "epoch": 5.83, "learning_rate": 3.055640946818322e-05, "loss": 2.792, "step": 227700 }, { "epoch": 5.84, "learning_rate": 3.054787034190662e-05, "loss": 2.6018, "step": 227800 }, { "epoch": 5.84, "learning_rate": 3.0539331215630017e-05, "loss": 2.7506, "step": 227900 }, { "epoch": 5.84, "learning_rate": 3.053079208935342e-05, "loss": 2.937, "step": 228000 }, { "epoch": 5.84, "learning_rate": 3.052225296307682e-05, "loss": 2.4773, "step": 228100 }, { "epoch": 5.85, "learning_rate": 3.0513713836800218e-05, "loss": 2.745, "step": 228200 }, { "epoch": 5.85, "learning_rate": 3.050517471052362e-05, "loss": 2.6591, "step": 228300 }, { "epoch": 5.85, "learning_rate": 3.0496635584247018e-05, "loss": 2.8307, "step": 228400 }, { "epoch": 5.85, "learning_rate": 3.0488096457970422e-05, "loss": 2.732, "step": 228500 }, { "epoch": 5.86, "learning_rate": 3.047955733169382e-05, "loss": 2.703, "step": 228600 }, { "epoch": 5.86, "learning_rate": 3.0471018205417222e-05, "loss": 2.7368, "step": 228700 }, { "epoch": 5.86, "learning_rate": 3.046247907914062e-05, "loss": 2.7134, "step": 228800 }, { "epoch": 5.86, "learning_rate": 3.0453939952864025e-05, "loss": 2.717, "step": 228900 }, { "epoch": 5.87, "learning_rate": 3.0445400826587423e-05, "loss": 2.6813, "step": 229000 }, { "epoch": 5.87, "learning_rate": 3.0436861700310825e-05, "loss": 2.7051, "step": 229100 }, { "epoch": 5.87, "learning_rate": 3.0428322574034223e-05, "loss": 2.4863, "step": 229200 }, { "epoch": 5.87, "learning_rate": 3.0419783447757628e-05, "loss": 2.8049, "step": 229300 }, { "epoch": 5.88, "learning_rate": 3.0411244321481026e-05, "loss": 2.7755, "step": 229400 }, { "epoch": 5.88, "learning_rate": 3.0402705195204428e-05, "loss": 2.9066, "step": 229500 }, { "epoch": 5.88, "learning_rate": 3.0394166068927826e-05, "loss": 3.0177, "step": 229600 }, { "epoch": 5.88, "learning_rate": 3.038562694265123e-05, "loss": 2.6426, "step": 229700 }, { "epoch": 5.89, "learning_rate": 3.037708781637463e-05, "loss": 2.7463, "step": 229800 }, { "epoch": 5.89, "learning_rate": 3.036854869009803e-05, "loss": 2.8526, "step": 229900 }, { "epoch": 5.89, "learning_rate": 3.036000956382143e-05, "loss": 2.9398, "step": 230000 }, { "epoch": 5.89, "learning_rate": 3.0351470437544834e-05, "loss": 2.5734, "step": 230100 }, { "epoch": 5.9, "learning_rate": 3.034293131126823e-05, "loss": 2.7124, "step": 230200 }, { "epoch": 5.9, "learning_rate": 3.0334392184991633e-05, "loss": 2.8195, "step": 230300 }, { "epoch": 5.9, "learning_rate": 3.032585305871503e-05, "loss": 2.7899, "step": 230400 }, { "epoch": 5.9, "learning_rate": 3.0317313932438433e-05, "loss": 2.7026, "step": 230500 }, { "epoch": 5.91, "learning_rate": 3.030877480616183e-05, "loss": 2.6566, "step": 230600 }, { "epoch": 5.91, "learning_rate": 3.0300235679885236e-05, "loss": 2.6287, "step": 230700 }, { "epoch": 5.91, "learning_rate": 3.0291696553608634e-05, "loss": 2.8339, "step": 230800 }, { "epoch": 5.92, "learning_rate": 3.0283157427332036e-05, "loss": 2.8427, "step": 230900 }, { "epoch": 5.92, "learning_rate": 3.0274618301055434e-05, "loss": 2.664, "step": 231000 }, { "epoch": 5.92, "learning_rate": 3.026607917477884e-05, "loss": 2.6706, "step": 231100 }, { "epoch": 5.92, "learning_rate": 3.0257540048502237e-05, "loss": 2.7868, "step": 231200 }, { "epoch": 5.93, "learning_rate": 3.024900092222564e-05, "loss": 2.9513, "step": 231300 }, { "epoch": 5.93, "learning_rate": 3.0240461795949037e-05, "loss": 2.7365, "step": 231400 }, { "epoch": 5.93, "learning_rate": 3.0231922669672442e-05, "loss": 2.5798, "step": 231500 }, { "epoch": 5.93, "learning_rate": 3.022338354339584e-05, "loss": 2.4588, "step": 231600 }, { "epoch": 5.94, "learning_rate": 3.021484441711924e-05, "loss": 2.84, "step": 231700 }, { "epoch": 5.94, "learning_rate": 3.020630529084264e-05, "loss": 2.7311, "step": 231800 }, { "epoch": 5.94, "learning_rate": 3.0197766164566045e-05, "loss": 2.6963, "step": 231900 }, { "epoch": 5.94, "learning_rate": 3.0189227038289443e-05, "loss": 2.7015, "step": 232000 }, { "epoch": 5.95, "learning_rate": 3.0180687912012844e-05, "loss": 2.8284, "step": 232100 }, { "epoch": 5.95, "learning_rate": 3.0172148785736242e-05, "loss": 2.616, "step": 232200 }, { "epoch": 5.95, "learning_rate": 3.0163609659459647e-05, "loss": 2.6611, "step": 232300 }, { "epoch": 5.95, "learning_rate": 3.0155070533183046e-05, "loss": 2.8142, "step": 232400 }, { "epoch": 5.96, "learning_rate": 3.0146531406906447e-05, "loss": 2.6198, "step": 232500 }, { "epoch": 5.96, "learning_rate": 3.0137992280629845e-05, "loss": 2.7892, "step": 232600 }, { "epoch": 5.96, "learning_rate": 3.012945315435325e-05, "loss": 2.7212, "step": 232700 }, { "epoch": 5.96, "learning_rate": 3.0120914028076645e-05, "loss": 2.6871, "step": 232800 }, { "epoch": 5.97, "learning_rate": 3.011237490180005e-05, "loss": 2.6104, "step": 232900 }, { "epoch": 5.97, "learning_rate": 3.0103835775523448e-05, "loss": 2.5973, "step": 233000 }, { "epoch": 5.97, "learning_rate": 3.009529664924685e-05, "loss": 2.7518, "step": 233100 }, { "epoch": 5.97, "learning_rate": 3.0086757522970248e-05, "loss": 2.6271, "step": 233200 }, { "epoch": 5.98, "learning_rate": 3.0078218396693653e-05, "loss": 2.7272, "step": 233300 }, { "epoch": 5.98, "learning_rate": 3.006967927041705e-05, "loss": 2.5538, "step": 233400 }, { "epoch": 5.98, "learning_rate": 3.0061140144140452e-05, "loss": 2.7496, "step": 233500 }, { "epoch": 5.98, "learning_rate": 3.005260101786385e-05, "loss": 2.7422, "step": 233600 }, { "epoch": 5.99, "learning_rate": 3.0044061891587256e-05, "loss": 2.6432, "step": 233700 }, { "epoch": 5.99, "learning_rate": 3.0035522765310654e-05, "loss": 2.6889, "step": 233800 }, { "epoch": 5.99, "learning_rate": 3.0026983639034055e-05, "loss": 2.7468, "step": 233900 }, { "epoch": 5.99, "learning_rate": 3.0018444512757453e-05, "loss": 2.834, "step": 234000 }, { "epoch": 6.0, "learning_rate": 3.000990538648086e-05, "loss": 2.7115, "step": 234100 }, { "epoch": 6.0, "learning_rate": 3.0001366260204257e-05, "loss": 2.5906, "step": 234200 }, { "epoch": 6.0, "learning_rate": 2.9992827133927658e-05, "loss": 2.6469, "step": 234300 }, { "epoch": 6.0, "learning_rate": 2.9984288007651056e-05, "loss": 2.7775, "step": 234400 }, { "epoch": 6.01, "learning_rate": 2.997574888137446e-05, "loss": 2.7041, "step": 234500 }, { "epoch": 6.01, "learning_rate": 2.996720975509786e-05, "loss": 2.7634, "step": 234600 }, { "epoch": 6.01, "learning_rate": 2.995867062882126e-05, "loss": 2.4696, "step": 234700 }, { "epoch": 6.01, "learning_rate": 2.995013150254466e-05, "loss": 2.7226, "step": 234800 }, { "epoch": 6.02, "learning_rate": 2.9941592376268064e-05, "loss": 2.67, "step": 234900 }, { "epoch": 6.02, "learning_rate": 2.9933053249991462e-05, "loss": 2.7104, "step": 235000 }, { "epoch": 6.02, "learning_rate": 2.9924514123714864e-05, "loss": 2.8479, "step": 235100 }, { "epoch": 6.03, "learning_rate": 2.9915974997438262e-05, "loss": 2.7838, "step": 235200 }, { "epoch": 6.03, "learning_rate": 2.9907435871161667e-05, "loss": 2.6987, "step": 235300 }, { "epoch": 6.03, "learning_rate": 2.989889674488506e-05, "loss": 2.6483, "step": 235400 }, { "epoch": 6.03, "learning_rate": 2.9890357618608466e-05, "loss": 2.5661, "step": 235500 }, { "epoch": 6.04, "learning_rate": 2.9881818492331865e-05, "loss": 2.6686, "step": 235600 }, { "epoch": 6.04, "learning_rate": 2.987327936605527e-05, "loss": 2.688, "step": 235700 }, { "epoch": 6.04, "learning_rate": 2.9864740239778664e-05, "loss": 2.76, "step": 235800 }, { "epoch": 6.04, "learning_rate": 2.985620111350207e-05, "loss": 2.6832, "step": 235900 }, { "epoch": 6.05, "learning_rate": 2.9847661987225467e-05, "loss": 2.6808, "step": 236000 }, { "epoch": 6.05, "learning_rate": 2.983912286094887e-05, "loss": 2.7677, "step": 236100 }, { "epoch": 6.05, "learning_rate": 2.9830583734672267e-05, "loss": 2.6441, "step": 236200 }, { "epoch": 6.05, "learning_rate": 2.9822044608395672e-05, "loss": 2.7636, "step": 236300 }, { "epoch": 6.06, "learning_rate": 2.981350548211907e-05, "loss": 2.7828, "step": 236400 }, { "epoch": 6.06, "learning_rate": 2.9804966355842472e-05, "loss": 2.5797, "step": 236500 }, { "epoch": 6.06, "learning_rate": 2.979642722956587e-05, "loss": 2.7867, "step": 236600 }, { "epoch": 6.06, "learning_rate": 2.9787888103289275e-05, "loss": 2.6842, "step": 236700 }, { "epoch": 6.07, "learning_rate": 2.9779348977012673e-05, "loss": 2.7861, "step": 236800 }, { "epoch": 6.07, "learning_rate": 2.9770809850736075e-05, "loss": 2.6882, "step": 236900 }, { "epoch": 6.07, "learning_rate": 2.9762270724459473e-05, "loss": 2.8817, "step": 237000 }, { "epoch": 6.07, "learning_rate": 2.9753731598182878e-05, "loss": 2.9218, "step": 237100 }, { "epoch": 6.08, "learning_rate": 2.9745192471906276e-05, "loss": 2.7078, "step": 237200 }, { "epoch": 6.08, "learning_rate": 2.9736653345629677e-05, "loss": 2.6177, "step": 237300 }, { "epoch": 6.08, "learning_rate": 2.9728114219353076e-05, "loss": 2.667, "step": 237400 }, { "epoch": 6.08, "learning_rate": 2.971957509307648e-05, "loss": 2.8412, "step": 237500 }, { "epoch": 6.09, "learning_rate": 2.971103596679988e-05, "loss": 2.7651, "step": 237600 }, { "epoch": 6.09, "learning_rate": 2.970249684052328e-05, "loss": 2.5657, "step": 237700 }, { "epoch": 6.09, "learning_rate": 2.969395771424668e-05, "loss": 2.4972, "step": 237800 }, { "epoch": 6.09, "learning_rate": 2.9685418587970083e-05, "loss": 2.6563, "step": 237900 }, { "epoch": 6.1, "learning_rate": 2.9676879461693478e-05, "loss": 2.7086, "step": 238000 }, { "epoch": 6.1, "learning_rate": 2.9668340335416883e-05, "loss": 2.4449, "step": 238100 }, { "epoch": 6.1, "learning_rate": 2.965980120914028e-05, "loss": 2.5057, "step": 238200 }, { "epoch": 6.1, "learning_rate": 2.9651262082863686e-05, "loss": 2.7743, "step": 238300 }, { "epoch": 6.11, "learning_rate": 2.964272295658708e-05, "loss": 2.6181, "step": 238400 }, { "epoch": 6.11, "learning_rate": 2.9634183830310486e-05, "loss": 2.6679, "step": 238500 }, { "epoch": 6.11, "learning_rate": 2.9625644704033884e-05, "loss": 2.908, "step": 238600 }, { "epoch": 6.11, "learning_rate": 2.9617105577757286e-05, "loss": 2.4055, "step": 238700 }, { "epoch": 6.12, "learning_rate": 2.9608566451480684e-05, "loss": 2.7305, "step": 238800 }, { "epoch": 6.12, "learning_rate": 2.960002732520409e-05, "loss": 2.7786, "step": 238900 }, { "epoch": 6.12, "learning_rate": 2.9591488198927487e-05, "loss": 2.7557, "step": 239000 }, { "epoch": 6.13, "learning_rate": 2.958294907265089e-05, "loss": 2.6417, "step": 239100 }, { "epoch": 6.13, "learning_rate": 2.9574409946374286e-05, "loss": 2.7198, "step": 239200 }, { "epoch": 6.13, "learning_rate": 2.956587082009769e-05, "loss": 2.7358, "step": 239300 }, { "epoch": 6.13, "learning_rate": 2.955733169382109e-05, "loss": 2.6815, "step": 239400 }, { "epoch": 6.14, "learning_rate": 2.954879256754449e-05, "loss": 2.6953, "step": 239500 }, { "epoch": 6.14, "learning_rate": 2.954025344126789e-05, "loss": 2.8371, "step": 239600 }, { "epoch": 6.14, "learning_rate": 2.9531714314991294e-05, "loss": 2.8867, "step": 239700 }, { "epoch": 6.14, "learning_rate": 2.9523175188714692e-05, "loss": 2.7678, "step": 239800 }, { "epoch": 6.15, "learning_rate": 2.9514636062438094e-05, "loss": 2.4464, "step": 239900 }, { "epoch": 6.15, "learning_rate": 2.9506096936161492e-05, "loss": 2.6759, "step": 240000 }, { "epoch": 6.15, "learning_rate": 2.9497557809884897e-05, "loss": 2.9032, "step": 240100 }, { "epoch": 6.15, "learning_rate": 2.9489018683608295e-05, "loss": 2.7916, "step": 240200 }, { "epoch": 6.16, "learning_rate": 2.9480479557331697e-05, "loss": 2.7683, "step": 240300 }, { "epoch": 6.16, "learning_rate": 2.9471940431055095e-05, "loss": 2.8258, "step": 240400 }, { "epoch": 6.16, "learning_rate": 2.94634013047785e-05, "loss": 2.5918, "step": 240500 }, { "epoch": 6.16, "learning_rate": 2.9454862178501898e-05, "loss": 2.7946, "step": 240600 }, { "epoch": 6.17, "learning_rate": 2.94463230522253e-05, "loss": 2.818, "step": 240700 }, { "epoch": 6.17, "learning_rate": 2.9437783925948698e-05, "loss": 2.6471, "step": 240800 }, { "epoch": 6.17, "learning_rate": 2.9429244799672103e-05, "loss": 2.6668, "step": 240900 }, { "epoch": 6.17, "learning_rate": 2.9420705673395497e-05, "loss": 2.7814, "step": 241000 }, { "epoch": 6.18, "learning_rate": 2.9412166547118902e-05, "loss": 2.7499, "step": 241100 }, { "epoch": 6.18, "learning_rate": 2.94036274208423e-05, "loss": 2.7778, "step": 241200 }, { "epoch": 6.18, "learning_rate": 2.9395088294565702e-05, "loss": 2.6787, "step": 241300 }, { "epoch": 6.18, "learning_rate": 2.93865491682891e-05, "loss": 2.7438, "step": 241400 }, { "epoch": 6.19, "learning_rate": 2.9378010042012505e-05, "loss": 2.7875, "step": 241500 }, { "epoch": 6.19, "learning_rate": 2.9369470915735903e-05, "loss": 2.6229, "step": 241600 }, { "epoch": 6.19, "learning_rate": 2.9360931789459305e-05, "loss": 2.736, "step": 241700 }, { "epoch": 6.19, "learning_rate": 2.9352392663182703e-05, "loss": 2.6908, "step": 241800 }, { "epoch": 6.2, "learning_rate": 2.9343853536906108e-05, "loss": 2.6863, "step": 241900 }, { "epoch": 6.2, "learning_rate": 2.9335314410629506e-05, "loss": 2.6862, "step": 242000 }, { "epoch": 6.2, "learning_rate": 2.9326775284352908e-05, "loss": 2.7347, "step": 242100 }, { "epoch": 6.2, "learning_rate": 2.9318236158076306e-05, "loss": 2.615, "step": 242200 }, { "epoch": 6.21, "learning_rate": 2.930969703179971e-05, "loss": 2.7367, "step": 242300 }, { "epoch": 6.21, "learning_rate": 2.930115790552311e-05, "loss": 2.7765, "step": 242400 }, { "epoch": 6.21, "learning_rate": 2.929261877924651e-05, "loss": 2.806, "step": 242500 }, { "epoch": 6.21, "learning_rate": 2.928407965296991e-05, "loss": 2.7895, "step": 242600 }, { "epoch": 6.22, "learning_rate": 2.9275540526693314e-05, "loss": 2.7328, "step": 242700 }, { "epoch": 6.22, "learning_rate": 2.9267001400416712e-05, "loss": 2.5228, "step": 242800 }, { "epoch": 6.22, "learning_rate": 2.9258462274140113e-05, "loss": 2.6651, "step": 242900 }, { "epoch": 6.23, "learning_rate": 2.924992314786351e-05, "loss": 2.7362, "step": 243000 }, { "epoch": 6.23, "learning_rate": 2.9241384021586916e-05, "loss": 2.8293, "step": 243100 }, { "epoch": 6.23, "learning_rate": 2.9232844895310315e-05, "loss": 2.7309, "step": 243200 }, { "epoch": 6.23, "learning_rate": 2.9224305769033716e-05, "loss": 2.5739, "step": 243300 }, { "epoch": 6.24, "learning_rate": 2.9215766642757114e-05, "loss": 2.6963, "step": 243400 }, { "epoch": 6.24, "learning_rate": 2.920722751648052e-05, "loss": 2.8438, "step": 243500 }, { "epoch": 6.24, "learning_rate": 2.9198688390203914e-05, "loss": 2.5773, "step": 243600 }, { "epoch": 6.24, "learning_rate": 2.919014926392732e-05, "loss": 2.6294, "step": 243700 }, { "epoch": 6.25, "learning_rate": 2.9181610137650717e-05, "loss": 2.5419, "step": 243800 }, { "epoch": 6.25, "learning_rate": 2.917307101137412e-05, "loss": 2.6182, "step": 243900 }, { "epoch": 6.25, "learning_rate": 2.9164531885097517e-05, "loss": 2.6046, "step": 244000 }, { "epoch": 6.25, "learning_rate": 2.915599275882092e-05, "loss": 2.6727, "step": 244100 }, { "epoch": 6.26, "learning_rate": 2.914745363254432e-05, "loss": 2.4974, "step": 244200 }, { "epoch": 6.26, "learning_rate": 2.913891450626772e-05, "loss": 2.6973, "step": 244300 }, { "epoch": 6.26, "learning_rate": 2.913037537999112e-05, "loss": 2.7346, "step": 244400 }, { "epoch": 6.26, "learning_rate": 2.9121836253714524e-05, "loss": 2.6768, "step": 244500 }, { "epoch": 6.27, "learning_rate": 2.9113297127437923e-05, "loss": 2.6179, "step": 244600 }, { "epoch": 6.27, "learning_rate": 2.9104758001161324e-05, "loss": 2.651, "step": 244700 }, { "epoch": 6.27, "learning_rate": 2.9096218874884722e-05, "loss": 2.6308, "step": 244800 }, { "epoch": 6.27, "learning_rate": 2.9087679748608127e-05, "loss": 2.9426, "step": 244900 }, { "epoch": 6.28, "learning_rate": 2.9079140622331525e-05, "loss": 2.8143, "step": 245000 }, { "epoch": 6.28, "learning_rate": 2.9070601496054927e-05, "loss": 2.5903, "step": 245100 }, { "epoch": 6.28, "learning_rate": 2.9062062369778325e-05, "loss": 2.7973, "step": 245200 }, { "epoch": 6.28, "learning_rate": 2.905352324350173e-05, "loss": 2.486, "step": 245300 }, { "epoch": 6.29, "learning_rate": 2.9044984117225128e-05, "loss": 2.7287, "step": 245400 }, { "epoch": 6.29, "learning_rate": 2.9036444990948526e-05, "loss": 2.7824, "step": 245500 }, { "epoch": 6.29, "learning_rate": 2.9027905864671928e-05, "loss": 2.6783, "step": 245600 }, { "epoch": 6.29, "learning_rate": 2.9019366738395326e-05, "loss": 2.7329, "step": 245700 }, { "epoch": 6.3, "learning_rate": 2.901082761211873e-05, "loss": 2.5198, "step": 245800 }, { "epoch": 6.3, "learning_rate": 2.9002288485842126e-05, "loss": 2.7395, "step": 245900 }, { "epoch": 6.3, "learning_rate": 2.899374935956553e-05, "loss": 2.7563, "step": 246000 }, { "epoch": 6.3, "learning_rate": 2.898521023328893e-05, "loss": 2.6676, "step": 246100 }, { "epoch": 6.31, "learning_rate": 2.897667110701233e-05, "loss": 2.6968, "step": 246200 }, { "epoch": 6.31, "learning_rate": 2.896813198073573e-05, "loss": 2.7464, "step": 246300 }, { "epoch": 6.31, "learning_rate": 2.8959592854459134e-05, "loss": 2.6647, "step": 246400 }, { "epoch": 6.31, "learning_rate": 2.8951053728182532e-05, "loss": 2.8056, "step": 246500 }, { "epoch": 6.32, "learning_rate": 2.8942514601905933e-05, "loss": 2.7379, "step": 246600 }, { "epoch": 6.32, "learning_rate": 2.893397547562933e-05, "loss": 2.6462, "step": 246700 }, { "epoch": 6.32, "learning_rate": 2.8925436349352736e-05, "loss": 2.7145, "step": 246800 }, { "epoch": 6.32, "learning_rate": 2.8916897223076135e-05, "loss": 2.7956, "step": 246900 }, { "epoch": 6.33, "learning_rate": 2.8908358096799536e-05, "loss": 2.8627, "step": 247000 }, { "epoch": 6.33, "learning_rate": 2.8899818970522934e-05, "loss": 2.736, "step": 247100 }, { "epoch": 6.33, "learning_rate": 2.889127984424634e-05, "loss": 2.6867, "step": 247200 }, { "epoch": 6.34, "learning_rate": 2.8882740717969737e-05, "loss": 2.7596, "step": 247300 }, { "epoch": 6.34, "learning_rate": 2.887420159169314e-05, "loss": 2.6373, "step": 247400 }, { "epoch": 6.34, "learning_rate": 2.8865662465416537e-05, "loss": 2.5411, "step": 247500 }, { "epoch": 6.34, "learning_rate": 2.8857123339139942e-05, "loss": 2.7351, "step": 247600 }, { "epoch": 6.35, "learning_rate": 2.884858421286334e-05, "loss": 2.6872, "step": 247700 }, { "epoch": 6.35, "learning_rate": 2.8840045086586742e-05, "loss": 2.6734, "step": 247800 }, { "epoch": 6.35, "learning_rate": 2.883150596031014e-05, "loss": 2.681, "step": 247900 }, { "epoch": 6.35, "learning_rate": 2.8822966834033545e-05, "loss": 2.5425, "step": 248000 }, { "epoch": 6.36, "learning_rate": 2.8814427707756943e-05, "loss": 2.7357, "step": 248100 }, { "epoch": 6.36, "learning_rate": 2.8805888581480345e-05, "loss": 2.6655, "step": 248200 }, { "epoch": 6.36, "learning_rate": 2.8797349455203743e-05, "loss": 2.7599, "step": 248300 }, { "epoch": 6.36, "learning_rate": 2.8788810328927148e-05, "loss": 2.4836, "step": 248400 }, { "epoch": 6.37, "learning_rate": 2.8780271202650542e-05, "loss": 2.4861, "step": 248500 }, { "epoch": 6.37, "learning_rate": 2.8771732076373947e-05, "loss": 2.5584, "step": 248600 }, { "epoch": 6.37, "learning_rate": 2.8763192950097345e-05, "loss": 2.6304, "step": 248700 }, { "epoch": 6.37, "learning_rate": 2.8754653823820747e-05, "loss": 2.534, "step": 248800 }, { "epoch": 6.38, "learning_rate": 2.8746114697544145e-05, "loss": 2.6754, "step": 248900 }, { "epoch": 6.38, "learning_rate": 2.873757557126755e-05, "loss": 2.763, "step": 249000 }, { "epoch": 6.38, "learning_rate": 2.8729036444990948e-05, "loss": 2.7081, "step": 249100 }, { "epoch": 6.38, "learning_rate": 2.872049731871435e-05, "loss": 2.7015, "step": 249200 }, { "epoch": 6.39, "learning_rate": 2.8711958192437748e-05, "loss": 2.5221, "step": 249300 }, { "epoch": 6.39, "learning_rate": 2.8703419066161153e-05, "loss": 2.7446, "step": 249400 }, { "epoch": 6.39, "learning_rate": 2.869487993988455e-05, "loss": 2.7252, "step": 249500 }, { "epoch": 6.39, "learning_rate": 2.8686340813607953e-05, "loss": 2.7931, "step": 249600 }, { "epoch": 6.4, "learning_rate": 2.867780168733135e-05, "loss": 2.5381, "step": 249700 }, { "epoch": 6.4, "learning_rate": 2.8669262561054756e-05, "loss": 2.6714, "step": 249800 }, { "epoch": 6.4, "learning_rate": 2.8660723434778154e-05, "loss": 2.5294, "step": 249900 }, { "epoch": 6.4, "learning_rate": 2.8652184308501555e-05, "loss": 2.6885, "step": 250000 }, { "epoch": 6.41, "learning_rate": 2.8643645182224954e-05, "loss": 2.7648, "step": 250100 }, { "epoch": 6.41, "learning_rate": 2.863510605594836e-05, "loss": 2.6819, "step": 250200 }, { "epoch": 6.41, "learning_rate": 2.8626566929671757e-05, "loss": 2.5423, "step": 250300 }, { "epoch": 6.41, "learning_rate": 2.8618027803395158e-05, "loss": 2.5962, "step": 250400 }, { "epoch": 6.42, "learning_rate": 2.8609488677118556e-05, "loss": 2.5923, "step": 250500 }, { "epoch": 6.42, "learning_rate": 2.860094955084196e-05, "loss": 2.6878, "step": 250600 }, { "epoch": 6.42, "learning_rate": 2.859241042456536e-05, "loss": 2.7258, "step": 250700 }, { "epoch": 6.42, "learning_rate": 2.858387129828876e-05, "loss": 2.7176, "step": 250800 }, { "epoch": 6.43, "learning_rate": 2.857533217201216e-05, "loss": 2.7349, "step": 250900 }, { "epoch": 6.43, "learning_rate": 2.8566793045735564e-05, "loss": 2.5558, "step": 251000 }, { "epoch": 6.43, "learning_rate": 2.855825391945896e-05, "loss": 2.6889, "step": 251100 }, { "epoch": 6.44, "learning_rate": 2.8549714793182364e-05, "loss": 2.7017, "step": 251200 }, { "epoch": 6.44, "learning_rate": 2.8541175666905762e-05, "loss": 2.7386, "step": 251300 }, { "epoch": 6.44, "learning_rate": 2.8532636540629164e-05, "loss": 2.686, "step": 251400 }, { "epoch": 6.44, "learning_rate": 2.8524097414352562e-05, "loss": 2.8088, "step": 251500 }, { "epoch": 6.45, "learning_rate": 2.8515558288075967e-05, "loss": 2.6469, "step": 251600 }, { "epoch": 6.45, "learning_rate": 2.8507019161799365e-05, "loss": 2.7537, "step": 251700 }, { "epoch": 6.45, "learning_rate": 2.8498480035522766e-05, "loss": 2.7216, "step": 251800 }, { "epoch": 6.45, "learning_rate": 2.8489940909246165e-05, "loss": 2.6939, "step": 251900 }, { "epoch": 6.46, "learning_rate": 2.848140178296957e-05, "loss": 2.5937, "step": 252000 }, { "epoch": 6.46, "learning_rate": 2.8472862656692968e-05, "loss": 2.7772, "step": 252100 }, { "epoch": 6.46, "learning_rate": 2.846432353041637e-05, "loss": 2.5186, "step": 252200 }, { "epoch": 6.46, "learning_rate": 2.8455784404139767e-05, "loss": 2.5647, "step": 252300 }, { "epoch": 6.47, "learning_rate": 2.8447245277863172e-05, "loss": 2.7313, "step": 252400 }, { "epoch": 6.47, "learning_rate": 2.843870615158657e-05, "loss": 2.8239, "step": 252500 }, { "epoch": 6.47, "learning_rate": 2.8430167025309972e-05, "loss": 2.6443, "step": 252600 }, { "epoch": 6.47, "learning_rate": 2.842162789903337e-05, "loss": 2.6207, "step": 252700 }, { "epoch": 6.48, "learning_rate": 2.8413088772756775e-05, "loss": 2.5298, "step": 252800 }, { "epoch": 6.48, "learning_rate": 2.8404549646480173e-05, "loss": 2.7086, "step": 252900 }, { "epoch": 6.48, "learning_rate": 2.8396010520203575e-05, "loss": 2.7574, "step": 253000 }, { "epoch": 6.48, "learning_rate": 2.8387471393926973e-05, "loss": 2.7623, "step": 253100 }, { "epoch": 6.49, "learning_rate": 2.8378932267650378e-05, "loss": 2.5917, "step": 253200 }, { "epoch": 6.49, "learning_rate": 2.8370393141373776e-05, "loss": 2.3612, "step": 253300 }, { "epoch": 6.49, "learning_rate": 2.8361854015097178e-05, "loss": 2.4796, "step": 253400 }, { "epoch": 6.49, "learning_rate": 2.8353314888820576e-05, "loss": 2.4856, "step": 253500 }, { "epoch": 6.5, "learning_rate": 2.834477576254398e-05, "loss": 2.5118, "step": 253600 }, { "epoch": 6.5, "learning_rate": 2.8336236636267375e-05, "loss": 2.663, "step": 253700 }, { "epoch": 6.5, "learning_rate": 2.832769750999078e-05, "loss": 2.6695, "step": 253800 }, { "epoch": 6.5, "learning_rate": 2.831915838371418e-05, "loss": 2.4447, "step": 253900 }, { "epoch": 6.51, "learning_rate": 2.831061925743758e-05, "loss": 2.6524, "step": 254000 }, { "epoch": 6.51, "learning_rate": 2.8302080131160978e-05, "loss": 2.6497, "step": 254100 }, { "epoch": 6.51, "learning_rate": 2.8293541004884383e-05, "loss": 2.6326, "step": 254200 }, { "epoch": 6.51, "learning_rate": 2.828500187860778e-05, "loss": 2.6074, "step": 254300 }, { "epoch": 6.52, "learning_rate": 2.8276462752331183e-05, "loss": 2.4123, "step": 254400 }, { "epoch": 6.52, "learning_rate": 2.826792362605458e-05, "loss": 2.6305, "step": 254500 }, { "epoch": 6.52, "learning_rate": 2.8259384499777986e-05, "loss": 2.5838, "step": 254600 }, { "epoch": 6.52, "learning_rate": 2.8250845373501384e-05, "loss": 2.6634, "step": 254700 }, { "epoch": 6.53, "learning_rate": 2.8242306247224786e-05, "loss": 2.66, "step": 254800 }, { "epoch": 6.53, "learning_rate": 2.8233767120948184e-05, "loss": 2.7131, "step": 254900 }, { "epoch": 6.53, "learning_rate": 2.822522799467159e-05, "loss": 2.5279, "step": 255000 }, { "epoch": 6.53, "learning_rate": 2.8216688868394987e-05, "loss": 2.6542, "step": 255100 }, { "epoch": 6.54, "learning_rate": 2.820814974211839e-05, "loss": 2.4608, "step": 255200 }, { "epoch": 6.54, "learning_rate": 2.8199610615841787e-05, "loss": 2.6652, "step": 255300 }, { "epoch": 6.54, "learning_rate": 2.819107148956519e-05, "loss": 2.5986, "step": 255400 }, { "epoch": 6.55, "learning_rate": 2.818253236328859e-05, "loss": 2.6741, "step": 255500 }, { "epoch": 6.55, "learning_rate": 2.817399323701199e-05, "loss": 2.7155, "step": 255600 }, { "epoch": 6.55, "learning_rate": 2.816545411073539e-05, "loss": 2.577, "step": 255700 }, { "epoch": 6.55, "learning_rate": 2.8156914984458794e-05, "loss": 2.4684, "step": 255800 }, { "epoch": 6.56, "learning_rate": 2.8148375858182193e-05, "loss": 2.631, "step": 255900 }, { "epoch": 6.56, "learning_rate": 2.8139836731905594e-05, "loss": 2.8226, "step": 256000 }, { "epoch": 6.56, "learning_rate": 2.8131297605628992e-05, "loss": 2.7944, "step": 256100 }, { "epoch": 6.56, "learning_rate": 2.8122758479352397e-05, "loss": 2.5859, "step": 256200 }, { "epoch": 6.57, "learning_rate": 2.8114219353075792e-05, "loss": 2.6318, "step": 256300 }, { "epoch": 6.57, "learning_rate": 2.8105680226799197e-05, "loss": 2.6843, "step": 256400 }, { "epoch": 6.57, "learning_rate": 2.8097141100522595e-05, "loss": 2.6963, "step": 256500 }, { "epoch": 6.57, "learning_rate": 2.8088601974245997e-05, "loss": 2.6305, "step": 256600 }, { "epoch": 6.58, "learning_rate": 2.8080062847969395e-05, "loss": 2.6668, "step": 256700 }, { "epoch": 6.58, "learning_rate": 2.80715237216928e-05, "loss": 2.537, "step": 256800 }, { "epoch": 6.58, "learning_rate": 2.8062984595416198e-05, "loss": 2.7308, "step": 256900 }, { "epoch": 6.58, "learning_rate": 2.80544454691396e-05, "loss": 2.7153, "step": 257000 }, { "epoch": 6.59, "learning_rate": 2.8045906342862998e-05, "loss": 2.6395, "step": 257100 }, { "epoch": 6.59, "learning_rate": 2.8037367216586403e-05, "loss": 2.4912, "step": 257200 }, { "epoch": 6.59, "learning_rate": 2.80288280903098e-05, "loss": 2.6566, "step": 257300 }, { "epoch": 6.59, "learning_rate": 2.8020288964033202e-05, "loss": 2.5217, "step": 257400 }, { "epoch": 6.6, "learning_rate": 2.80117498377566e-05, "loss": 2.7992, "step": 257500 }, { "epoch": 6.6, "learning_rate": 2.8003210711480005e-05, "loss": 2.7055, "step": 257600 }, { "epoch": 6.6, "learning_rate": 2.7994671585203404e-05, "loss": 2.5757, "step": 257700 }, { "epoch": 6.6, "learning_rate": 2.7986132458926805e-05, "loss": 2.8099, "step": 257800 }, { "epoch": 6.61, "learning_rate": 2.7977593332650203e-05, "loss": 2.5271, "step": 257900 }, { "epoch": 6.61, "learning_rate": 2.7969054206373608e-05, "loss": 2.661, "step": 258000 }, { "epoch": 6.61, "learning_rate": 2.7960515080097006e-05, "loss": 2.6143, "step": 258100 }, { "epoch": 6.61, "learning_rate": 2.7951975953820408e-05, "loss": 2.5266, "step": 258200 }, { "epoch": 6.62, "learning_rate": 2.7943436827543806e-05, "loss": 2.6032, "step": 258300 }, { "epoch": 6.62, "learning_rate": 2.793489770126721e-05, "loss": 2.5501, "step": 258400 }, { "epoch": 6.62, "learning_rate": 2.792635857499061e-05, "loss": 2.6541, "step": 258500 }, { "epoch": 6.62, "learning_rate": 2.791781944871401e-05, "loss": 2.6406, "step": 258600 }, { "epoch": 6.63, "learning_rate": 2.790928032243741e-05, "loss": 2.5852, "step": 258700 }, { "epoch": 6.63, "learning_rate": 2.7900741196160814e-05, "loss": 2.6384, "step": 258800 }, { "epoch": 6.63, "learning_rate": 2.789220206988421e-05, "loss": 2.5915, "step": 258900 }, { "epoch": 6.63, "learning_rate": 2.7883662943607613e-05, "loss": 2.6916, "step": 259000 }, { "epoch": 6.64, "learning_rate": 2.787512381733101e-05, "loss": 2.6679, "step": 259100 }, { "epoch": 6.64, "learning_rate": 2.7866584691054413e-05, "loss": 2.732, "step": 259200 }, { "epoch": 6.64, "learning_rate": 2.785804556477781e-05, "loss": 2.7541, "step": 259300 }, { "epoch": 6.65, "learning_rate": 2.7849506438501216e-05, "loss": 2.4927, "step": 259400 }, { "epoch": 6.65, "learning_rate": 2.7840967312224614e-05, "loss": 2.5912, "step": 259500 }, { "epoch": 6.65, "learning_rate": 2.7832428185948016e-05, "loss": 2.5699, "step": 259600 }, { "epoch": 6.65, "learning_rate": 2.7823889059671414e-05, "loss": 2.6161, "step": 259700 }, { "epoch": 6.66, "learning_rate": 2.781534993339482e-05, "loss": 2.6555, "step": 259800 }, { "epoch": 6.66, "learning_rate": 2.7806810807118217e-05, "loss": 2.586, "step": 259900 }, { "epoch": 6.66, "learning_rate": 2.779827168084162e-05, "loss": 2.817, "step": 260000 }, { "epoch": 6.66, "learning_rate": 2.7789732554565017e-05, "loss": 2.7655, "step": 260100 }, { "epoch": 6.67, "learning_rate": 2.7781193428288422e-05, "loss": 2.6764, "step": 260200 }, { "epoch": 6.67, "learning_rate": 2.777265430201182e-05, "loss": 2.7336, "step": 260300 }, { "epoch": 6.67, "learning_rate": 2.776411517573522e-05, "loss": 2.7111, "step": 260400 }, { "epoch": 6.67, "learning_rate": 2.775557604945862e-05, "loss": 2.6737, "step": 260500 }, { "epoch": 6.68, "learning_rate": 2.7747036923182025e-05, "loss": 2.6634, "step": 260600 }, { "epoch": 6.68, "learning_rate": 2.7738497796905423e-05, "loss": 2.6087, "step": 260700 }, { "epoch": 6.68, "learning_rate": 2.7729958670628824e-05, "loss": 2.5933, "step": 260800 }, { "epoch": 6.68, "learning_rate": 2.7721419544352223e-05, "loss": 2.5006, "step": 260900 }, { "epoch": 6.69, "learning_rate": 2.7712880418075627e-05, "loss": 2.6066, "step": 261000 }, { "epoch": 6.69, "learning_rate": 2.7704341291799026e-05, "loss": 2.7219, "step": 261100 }, { "epoch": 6.69, "learning_rate": 2.7695802165522427e-05, "loss": 2.8082, "step": 261200 }, { "epoch": 6.69, "learning_rate": 2.7687263039245825e-05, "loss": 2.4713, "step": 261300 }, { "epoch": 6.7, "learning_rate": 2.767872391296923e-05, "loss": 2.5928, "step": 261400 }, { "epoch": 6.7, "learning_rate": 2.7670184786692625e-05, "loss": 2.4251, "step": 261500 }, { "epoch": 6.7, "learning_rate": 2.766164566041603e-05, "loss": 2.4727, "step": 261600 }, { "epoch": 6.7, "learning_rate": 2.7653106534139428e-05, "loss": 2.6667, "step": 261700 }, { "epoch": 6.71, "learning_rate": 2.764456740786283e-05, "loss": 2.6748, "step": 261800 }, { "epoch": 6.71, "learning_rate": 2.7636028281586228e-05, "loss": 2.7072, "step": 261900 }, { "epoch": 6.71, "learning_rate": 2.7627489155309633e-05, "loss": 2.6406, "step": 262000 }, { "epoch": 6.71, "learning_rate": 2.761895002903303e-05, "loss": 2.766, "step": 262100 }, { "epoch": 6.72, "learning_rate": 2.7610410902756433e-05, "loss": 2.6034, "step": 262200 }, { "epoch": 6.72, "learning_rate": 2.760187177647983e-05, "loss": 2.6623, "step": 262300 }, { "epoch": 6.72, "learning_rate": 2.7593332650203236e-05, "loss": 2.7657, "step": 262400 }, { "epoch": 6.72, "learning_rate": 2.7584793523926634e-05, "loss": 2.6565, "step": 262500 }, { "epoch": 6.73, "learning_rate": 2.7576254397650035e-05, "loss": 2.4814, "step": 262600 }, { "epoch": 6.73, "learning_rate": 2.7567715271373433e-05, "loss": 2.6561, "step": 262700 }, { "epoch": 6.73, "learning_rate": 2.755917614509684e-05, "loss": 2.7845, "step": 262800 }, { "epoch": 6.73, "learning_rate": 2.7550637018820237e-05, "loss": 2.7197, "step": 262900 }, { "epoch": 6.74, "learning_rate": 2.7542097892543635e-05, "loss": 2.4288, "step": 263000 }, { "epoch": 6.74, "learning_rate": 2.7533558766267036e-05, "loss": 2.6412, "step": 263100 }, { "epoch": 6.74, "learning_rate": 2.7525019639990434e-05, "loss": 2.6682, "step": 263200 }, { "epoch": 6.75, "learning_rate": 2.751648051371384e-05, "loss": 2.6347, "step": 263300 }, { "epoch": 6.75, "learning_rate": 2.7507941387437238e-05, "loss": 2.6485, "step": 263400 }, { "epoch": 6.75, "learning_rate": 2.749940226116064e-05, "loss": 2.7453, "step": 263500 }, { "epoch": 6.75, "learning_rate": 2.7490863134884037e-05, "loss": 2.5326, "step": 263600 }, { "epoch": 6.76, "learning_rate": 2.7482324008607442e-05, "loss": 2.7145, "step": 263700 }, { "epoch": 6.76, "learning_rate": 2.7473784882330837e-05, "loss": 2.6467, "step": 263800 }, { "epoch": 6.76, "learning_rate": 2.7465245756054242e-05, "loss": 2.7114, "step": 263900 }, { "epoch": 6.76, "learning_rate": 2.745670662977764e-05, "loss": 2.6683, "step": 264000 }, { "epoch": 6.77, "learning_rate": 2.744816750350104e-05, "loss": 2.5731, "step": 264100 }, { "epoch": 6.77, "learning_rate": 2.743962837722444e-05, "loss": 2.7838, "step": 264200 }, { "epoch": 6.77, "learning_rate": 2.7431089250947845e-05, "loss": 2.6185, "step": 264300 }, { "epoch": 6.77, "learning_rate": 2.7422550124671243e-05, "loss": 2.6239, "step": 264400 }, { "epoch": 6.78, "learning_rate": 2.7414010998394644e-05, "loss": 2.5764, "step": 264500 }, { "epoch": 6.78, "learning_rate": 2.7405471872118043e-05, "loss": 2.6734, "step": 264600 }, { "epoch": 6.78, "learning_rate": 2.7396932745841448e-05, "loss": 2.7873, "step": 264700 }, { "epoch": 6.78, "learning_rate": 2.7388393619564846e-05, "loss": 2.5816, "step": 264800 }, { "epoch": 6.79, "learning_rate": 2.7379854493288247e-05, "loss": 2.5118, "step": 264900 }, { "epoch": 6.79, "learning_rate": 2.7371315367011645e-05, "loss": 2.613, "step": 265000 }, { "epoch": 6.79, "learning_rate": 2.736277624073505e-05, "loss": 2.6056, "step": 265100 }, { "epoch": 6.79, "learning_rate": 2.735423711445845e-05, "loss": 2.6834, "step": 265200 }, { "epoch": 6.8, "learning_rate": 2.734569798818185e-05, "loss": 2.6059, "step": 265300 }, { "epoch": 6.8, "learning_rate": 2.7337158861905248e-05, "loss": 2.7789, "step": 265400 }, { "epoch": 6.8, "learning_rate": 2.7328619735628653e-05, "loss": 2.679, "step": 265500 }, { "epoch": 6.8, "learning_rate": 2.732008060935205e-05, "loss": 2.7703, "step": 265600 }, { "epoch": 6.81, "learning_rate": 2.7311541483075453e-05, "loss": 2.6233, "step": 265700 }, { "epoch": 6.81, "learning_rate": 2.730300235679885e-05, "loss": 2.7459, "step": 265800 }, { "epoch": 6.81, "learning_rate": 2.7294463230522256e-05, "loss": 2.5026, "step": 265900 }, { "epoch": 6.81, "learning_rate": 2.7285924104245654e-05, "loss": 2.6283, "step": 266000 }, { "epoch": 6.82, "learning_rate": 2.7277384977969056e-05, "loss": 2.621, "step": 266100 }, { "epoch": 6.82, "learning_rate": 2.7268845851692454e-05, "loss": 2.7844, "step": 266200 }, { "epoch": 6.82, "learning_rate": 2.726030672541586e-05, "loss": 2.895, "step": 266300 }, { "epoch": 6.82, "learning_rate": 2.7251767599139253e-05, "loss": 2.4861, "step": 266400 }, { "epoch": 6.83, "learning_rate": 2.724322847286266e-05, "loss": 2.6531, "step": 266500 }, { "epoch": 6.83, "learning_rate": 2.7234689346586057e-05, "loss": 2.6349, "step": 266600 }, { "epoch": 6.83, "learning_rate": 2.722615022030946e-05, "loss": 2.6672, "step": 266700 }, { "epoch": 6.83, "learning_rate": 2.7217611094032856e-05, "loss": 2.5503, "step": 266800 }, { "epoch": 6.84, "learning_rate": 2.720907196775626e-05, "loss": 2.4387, "step": 266900 }, { "epoch": 6.84, "learning_rate": 2.720053284147966e-05, "loss": 2.629, "step": 267000 }, { "epoch": 6.84, "learning_rate": 2.719199371520306e-05, "loss": 2.7743, "step": 267100 }, { "epoch": 6.84, "learning_rate": 2.718345458892646e-05, "loss": 2.6763, "step": 267200 }, { "epoch": 6.85, "learning_rate": 2.7174915462649864e-05, "loss": 2.6692, "step": 267300 }, { "epoch": 6.85, "learning_rate": 2.7166376336373262e-05, "loss": 2.6085, "step": 267400 }, { "epoch": 6.85, "learning_rate": 2.7157837210096664e-05, "loss": 2.8022, "step": 267500 }, { "epoch": 6.86, "learning_rate": 2.7149298083820062e-05, "loss": 2.4493, "step": 267600 }, { "epoch": 6.86, "learning_rate": 2.7140758957543467e-05, "loss": 2.4776, "step": 267700 }, { "epoch": 6.86, "learning_rate": 2.7132219831266865e-05, "loss": 2.5499, "step": 267800 }, { "epoch": 6.86, "learning_rate": 2.7123680704990267e-05, "loss": 2.6288, "step": 267900 }, { "epoch": 6.87, "learning_rate": 2.7115141578713665e-05, "loss": 2.6296, "step": 268000 }, { "epoch": 6.87, "learning_rate": 2.710660245243707e-05, "loss": 2.6508, "step": 268100 }, { "epoch": 6.87, "learning_rate": 2.7098063326160468e-05, "loss": 2.4546, "step": 268200 }, { "epoch": 6.87, "learning_rate": 2.708952419988387e-05, "loss": 2.6228, "step": 268300 }, { "epoch": 6.88, "learning_rate": 2.7080985073607268e-05, "loss": 2.6905, "step": 268400 }, { "epoch": 6.88, "learning_rate": 2.7072445947330672e-05, "loss": 2.5638, "step": 268500 }, { "epoch": 6.88, "learning_rate": 2.706390682105407e-05, "loss": 2.5877, "step": 268600 }, { "epoch": 6.88, "learning_rate": 2.7055367694777472e-05, "loss": 2.7355, "step": 268700 }, { "epoch": 6.89, "learning_rate": 2.704682856850087e-05, "loss": 2.5392, "step": 268800 }, { "epoch": 6.89, "learning_rate": 2.7038289442224275e-05, "loss": 2.564, "step": 268900 }, { "epoch": 6.89, "learning_rate": 2.702975031594767e-05, "loss": 2.7087, "step": 269000 }, { "epoch": 6.89, "learning_rate": 2.7021211189671075e-05, "loss": 2.7114, "step": 269100 }, { "epoch": 6.9, "learning_rate": 2.7012672063394473e-05, "loss": 2.7633, "step": 269200 }, { "epoch": 6.9, "learning_rate": 2.7004132937117878e-05, "loss": 2.6081, "step": 269300 }, { "epoch": 6.9, "learning_rate": 2.6995593810841273e-05, "loss": 2.5344, "step": 269400 }, { "epoch": 6.9, "learning_rate": 2.6987054684564678e-05, "loss": 2.7626, "step": 269500 }, { "epoch": 6.91, "learning_rate": 2.6978515558288076e-05, "loss": 2.5426, "step": 269600 }, { "epoch": 6.91, "learning_rate": 2.6969976432011477e-05, "loss": 2.7273, "step": 269700 }, { "epoch": 6.91, "learning_rate": 2.6961437305734876e-05, "loss": 2.5759, "step": 269800 }, { "epoch": 6.91, "learning_rate": 2.695289817945828e-05, "loss": 2.5017, "step": 269900 }, { "epoch": 6.92, "learning_rate": 2.694435905318168e-05, "loss": 2.592, "step": 270000 }, { "epoch": 6.92, "learning_rate": 2.693581992690508e-05, "loss": 2.4799, "step": 270100 }, { "epoch": 6.92, "learning_rate": 2.692728080062848e-05, "loss": 2.6806, "step": 270200 }, { "epoch": 6.92, "learning_rate": 2.6918741674351883e-05, "loss": 2.6202, "step": 270300 }, { "epoch": 6.93, "learning_rate": 2.691020254807528e-05, "loss": 2.5736, "step": 270400 }, { "epoch": 6.93, "learning_rate": 2.6901663421798683e-05, "loss": 2.5092, "step": 270500 }, { "epoch": 6.93, "learning_rate": 2.689312429552208e-05, "loss": 2.5518, "step": 270600 }, { "epoch": 6.93, "learning_rate": 2.6884585169245486e-05, "loss": 2.7833, "step": 270700 }, { "epoch": 6.94, "learning_rate": 2.6876046042968884e-05, "loss": 2.4738, "step": 270800 }, { "epoch": 6.94, "learning_rate": 2.6867506916692286e-05, "loss": 2.6425, "step": 270900 }, { "epoch": 6.94, "learning_rate": 2.6858967790415684e-05, "loss": 2.7705, "step": 271000 }, { "epoch": 6.94, "learning_rate": 2.685042866413909e-05, "loss": 2.5649, "step": 271100 }, { "epoch": 6.95, "learning_rate": 2.6841889537862487e-05, "loss": 2.6953, "step": 271200 }, { "epoch": 6.95, "learning_rate": 2.683335041158589e-05, "loss": 2.5427, "step": 271300 }, { "epoch": 6.95, "learning_rate": 2.6824811285309287e-05, "loss": 2.7958, "step": 271400 }, { "epoch": 6.96, "learning_rate": 2.6816272159032692e-05, "loss": 2.6604, "step": 271500 }, { "epoch": 6.96, "learning_rate": 2.680773303275609e-05, "loss": 2.6293, "step": 271600 }, { "epoch": 6.96, "learning_rate": 2.679919390647949e-05, "loss": 2.6232, "step": 271700 }, { "epoch": 6.96, "learning_rate": 2.679065478020289e-05, "loss": 2.6486, "step": 271800 }, { "epoch": 6.97, "learning_rate": 2.6782115653926295e-05, "loss": 2.5863, "step": 271900 }, { "epoch": 6.97, "learning_rate": 2.677357652764969e-05, "loss": 2.5454, "step": 272000 }, { "epoch": 6.97, "learning_rate": 2.6765037401373094e-05, "loss": 2.5873, "step": 272100 }, { "epoch": 6.97, "learning_rate": 2.6756498275096492e-05, "loss": 2.6361, "step": 272200 }, { "epoch": 6.98, "learning_rate": 2.6747959148819894e-05, "loss": 2.7453, "step": 272300 }, { "epoch": 6.98, "learning_rate": 2.6739420022543292e-05, "loss": 2.6073, "step": 272400 }, { "epoch": 6.98, "learning_rate": 2.6730880896266697e-05, "loss": 2.7198, "step": 272500 }, { "epoch": 6.98, "learning_rate": 2.6722341769990095e-05, "loss": 2.5774, "step": 272600 }, { "epoch": 6.99, "learning_rate": 2.6713802643713497e-05, "loss": 2.6256, "step": 272700 }, { "epoch": 6.99, "learning_rate": 2.6705263517436895e-05, "loss": 2.5441, "step": 272800 }, { "epoch": 6.99, "learning_rate": 2.66967243911603e-05, "loss": 2.5643, "step": 272900 }, { "epoch": 6.99, "learning_rate": 2.6688185264883698e-05, "loss": 2.8956, "step": 273000 }, { "epoch": 7.0, "learning_rate": 2.66796461386071e-05, "loss": 2.5068, "step": 273100 }, { "epoch": 7.0, "learning_rate": 2.6671107012330498e-05, "loss": 2.7411, "step": 273200 }, { "epoch": 7.0, "learning_rate": 2.6662567886053903e-05, "loss": 2.6969, "step": 273300 }, { "epoch": 7.0, "learning_rate": 2.66540287597773e-05, "loss": 2.4939, "step": 273400 }, { "epoch": 7.01, "learning_rate": 2.6645489633500702e-05, "loss": 2.5886, "step": 273500 }, { "epoch": 7.01, "learning_rate": 2.66369505072241e-05, "loss": 2.5141, "step": 273600 }, { "epoch": 7.01, "learning_rate": 2.6628411380947506e-05, "loss": 2.6091, "step": 273700 }, { "epoch": 7.01, "learning_rate": 2.6619872254670904e-05, "loss": 2.5974, "step": 273800 }, { "epoch": 7.02, "learning_rate": 2.6611333128394305e-05, "loss": 2.5129, "step": 273900 }, { "epoch": 7.02, "learning_rate": 2.6602794002117703e-05, "loss": 2.5836, "step": 274000 }, { "epoch": 7.02, "learning_rate": 2.659425487584111e-05, "loss": 2.5638, "step": 274100 }, { "epoch": 7.02, "learning_rate": 2.6585715749564506e-05, "loss": 2.5783, "step": 274200 }, { "epoch": 7.03, "learning_rate": 2.6577176623287908e-05, "loss": 2.6268, "step": 274300 }, { "epoch": 7.03, "learning_rate": 2.6568637497011306e-05, "loss": 2.6178, "step": 274400 }, { "epoch": 7.03, "learning_rate": 2.656009837073471e-05, "loss": 2.5375, "step": 274500 }, { "epoch": 7.03, "learning_rate": 2.6551559244458106e-05, "loss": 2.6121, "step": 274600 }, { "epoch": 7.04, "learning_rate": 2.654302011818151e-05, "loss": 2.5868, "step": 274700 }, { "epoch": 7.04, "learning_rate": 2.653448099190491e-05, "loss": 2.6258, "step": 274800 }, { "epoch": 7.04, "learning_rate": 2.652594186562831e-05, "loss": 2.6168, "step": 274900 }, { "epoch": 7.04, "learning_rate": 2.651740273935171e-05, "loss": 2.6585, "step": 275000 }, { "epoch": 7.05, "learning_rate": 2.6508863613075114e-05, "loss": 2.7558, "step": 275100 }, { "epoch": 7.05, "learning_rate": 2.6500324486798512e-05, "loss": 2.4509, "step": 275200 }, { "epoch": 7.05, "learning_rate": 2.6491785360521913e-05, "loss": 2.6579, "step": 275300 }, { "epoch": 7.06, "learning_rate": 2.648324623424531e-05, "loss": 2.5262, "step": 275400 }, { "epoch": 7.06, "learning_rate": 2.6474707107968716e-05, "loss": 2.5844, "step": 275500 }, { "epoch": 7.06, "learning_rate": 2.6466167981692115e-05, "loss": 2.4258, "step": 275600 }, { "epoch": 7.06, "learning_rate": 2.6457628855415516e-05, "loss": 2.6116, "step": 275700 }, { "epoch": 7.07, "learning_rate": 2.6449089729138914e-05, "loss": 2.4755, "step": 275800 }, { "epoch": 7.07, "learning_rate": 2.644055060286232e-05, "loss": 2.5765, "step": 275900 }, { "epoch": 7.07, "learning_rate": 2.6432011476585717e-05, "loss": 2.5403, "step": 276000 }, { "epoch": 7.07, "learning_rate": 2.642347235030912e-05, "loss": 2.6769, "step": 276100 }, { "epoch": 7.08, "learning_rate": 2.6414933224032517e-05, "loss": 2.4913, "step": 276200 }, { "epoch": 7.08, "learning_rate": 2.6406394097755922e-05, "loss": 2.6235, "step": 276300 }, { "epoch": 7.08, "learning_rate": 2.639785497147932e-05, "loss": 2.6402, "step": 276400 }, { "epoch": 7.08, "learning_rate": 2.6389315845202722e-05, "loss": 2.5187, "step": 276500 }, { "epoch": 7.09, "learning_rate": 2.638077671892612e-05, "loss": 2.7239, "step": 276600 }, { "epoch": 7.09, "learning_rate": 2.6372237592649525e-05, "loss": 2.5616, "step": 276700 }, { "epoch": 7.09, "learning_rate": 2.6363698466372923e-05, "loss": 2.5075, "step": 276800 }, { "epoch": 7.09, "learning_rate": 2.6355159340096325e-05, "loss": 2.6405, "step": 276900 }, { "epoch": 7.1, "learning_rate": 2.6346620213819723e-05, "loss": 2.7607, "step": 277000 }, { "epoch": 7.1, "learning_rate": 2.6338081087543128e-05, "loss": 2.6004, "step": 277100 }, { "epoch": 7.1, "learning_rate": 2.6329541961266522e-05, "loss": 2.3485, "step": 277200 }, { "epoch": 7.1, "learning_rate": 2.6321002834989927e-05, "loss": 2.6571, "step": 277300 }, { "epoch": 7.11, "learning_rate": 2.6312463708713326e-05, "loss": 2.6115, "step": 277400 }, { "epoch": 7.11, "learning_rate": 2.6303924582436727e-05, "loss": 2.6166, "step": 277500 }, { "epoch": 7.11, "learning_rate": 2.6295385456160125e-05, "loss": 2.6621, "step": 277600 }, { "epoch": 7.11, "learning_rate": 2.628684632988353e-05, "loss": 2.652, "step": 277700 }, { "epoch": 7.12, "learning_rate": 2.627830720360693e-05, "loss": 2.667, "step": 277800 }, { "epoch": 7.12, "learning_rate": 2.626976807733033e-05, "loss": 2.6392, "step": 277900 }, { "epoch": 7.12, "learning_rate": 2.6261228951053728e-05, "loss": 2.5462, "step": 278000 }, { "epoch": 7.12, "learning_rate": 2.6252689824777133e-05, "loss": 2.514, "step": 278100 }, { "epoch": 7.13, "learning_rate": 2.624415069850053e-05, "loss": 2.5877, "step": 278200 }, { "epoch": 7.13, "learning_rate": 2.6235611572223933e-05, "loss": 2.6484, "step": 278300 }, { "epoch": 7.13, "learning_rate": 2.622707244594733e-05, "loss": 2.772, "step": 278400 }, { "epoch": 7.13, "learning_rate": 2.6218533319670736e-05, "loss": 2.6233, "step": 278500 }, { "epoch": 7.14, "learning_rate": 2.6209994193394134e-05, "loss": 2.4345, "step": 278600 }, { "epoch": 7.14, "learning_rate": 2.6201455067117536e-05, "loss": 2.6611, "step": 278700 }, { "epoch": 7.14, "learning_rate": 2.6192915940840934e-05, "loss": 2.6447, "step": 278800 }, { "epoch": 7.14, "learning_rate": 2.618437681456434e-05, "loss": 2.5533, "step": 278900 }, { "epoch": 7.15, "learning_rate": 2.6175837688287737e-05, "loss": 2.5604, "step": 279000 }, { "epoch": 7.15, "learning_rate": 2.616729856201114e-05, "loss": 2.7492, "step": 279100 }, { "epoch": 7.15, "learning_rate": 2.6158759435734536e-05, "loss": 2.4866, "step": 279200 }, { "epoch": 7.15, "learning_rate": 2.615022030945794e-05, "loss": 2.6712, "step": 279300 }, { "epoch": 7.16, "learning_rate": 2.614168118318134e-05, "loss": 2.5109, "step": 279400 }, { "epoch": 7.16, "learning_rate": 2.613314205690474e-05, "loss": 2.5113, "step": 279500 }, { "epoch": 7.16, "learning_rate": 2.612460293062814e-05, "loss": 2.609, "step": 279600 }, { "epoch": 7.17, "learning_rate": 2.6116063804351544e-05, "loss": 2.5117, "step": 279700 }, { "epoch": 7.17, "learning_rate": 2.610752467807494e-05, "loss": 2.7068, "step": 279800 }, { "epoch": 7.17, "learning_rate": 2.6098985551798344e-05, "loss": 2.6591, "step": 279900 }, { "epoch": 7.17, "learning_rate": 2.6090446425521742e-05, "loss": 2.5596, "step": 280000 }, { "epoch": 7.18, "learning_rate": 2.6081907299245144e-05, "loss": 2.7538, "step": 280100 }, { "epoch": 7.18, "learning_rate": 2.6073368172968542e-05, "loss": 2.5003, "step": 280200 }, { "epoch": 7.18, "learning_rate": 2.6064829046691947e-05, "loss": 2.6085, "step": 280300 }, { "epoch": 7.18, "learning_rate": 2.6056289920415345e-05, "loss": 2.5711, "step": 280400 }, { "epoch": 7.19, "learning_rate": 2.6047750794138743e-05, "loss": 2.6625, "step": 280500 }, { "epoch": 7.19, "learning_rate": 2.6039211667862145e-05, "loss": 2.6042, "step": 280600 }, { "epoch": 7.19, "learning_rate": 2.6030672541585543e-05, "loss": 2.5667, "step": 280700 }, { "epoch": 7.19, "learning_rate": 2.6022133415308948e-05, "loss": 2.5048, "step": 280800 }, { "epoch": 7.2, "learning_rate": 2.6013594289032346e-05, "loss": 2.5722, "step": 280900 }, { "epoch": 7.2, "learning_rate": 2.6005055162755747e-05, "loss": 2.5667, "step": 281000 }, { "epoch": 7.2, "learning_rate": 2.5996516036479146e-05, "loss": 2.6249, "step": 281100 }, { "epoch": 7.2, "learning_rate": 2.598797691020255e-05, "loss": 2.4988, "step": 281200 }, { "epoch": 7.21, "learning_rate": 2.597943778392595e-05, "loss": 2.5008, "step": 281300 }, { "epoch": 7.21, "learning_rate": 2.597089865764935e-05, "loss": 2.7434, "step": 281400 }, { "epoch": 7.21, "learning_rate": 2.596235953137275e-05, "loss": 2.6359, "step": 281500 }, { "epoch": 7.21, "learning_rate": 2.5953820405096153e-05, "loss": 2.7356, "step": 281600 }, { "epoch": 7.22, "learning_rate": 2.594528127881955e-05, "loss": 2.3953, "step": 281700 }, { "epoch": 7.22, "learning_rate": 2.5936742152542953e-05, "loss": 2.4301, "step": 281800 }, { "epoch": 7.22, "learning_rate": 2.592820302626635e-05, "loss": 2.5427, "step": 281900 }, { "epoch": 7.22, "learning_rate": 2.5919663899989756e-05, "loss": 2.526, "step": 282000 }, { "epoch": 7.23, "learning_rate": 2.591112477371315e-05, "loss": 2.6178, "step": 282100 }, { "epoch": 7.23, "learning_rate": 2.5902585647436556e-05, "loss": 2.6561, "step": 282200 }, { "epoch": 7.23, "learning_rate": 2.5894046521159954e-05, "loss": 2.4964, "step": 282300 }, { "epoch": 7.23, "learning_rate": 2.5885507394883356e-05, "loss": 2.4569, "step": 282400 }, { "epoch": 7.24, "learning_rate": 2.5876968268606754e-05, "loss": 2.5758, "step": 282500 }, { "epoch": 7.24, "learning_rate": 2.586842914233016e-05, "loss": 2.5038, "step": 282600 }, { "epoch": 7.24, "learning_rate": 2.5859890016053557e-05, "loss": 2.5867, "step": 282700 }, { "epoch": 7.24, "learning_rate": 2.585135088977696e-05, "loss": 2.7712, "step": 282800 }, { "epoch": 7.25, "learning_rate": 2.5842811763500356e-05, "loss": 2.3986, "step": 282900 }, { "epoch": 7.25, "learning_rate": 2.583427263722376e-05, "loss": 2.6106, "step": 283000 }, { "epoch": 7.25, "learning_rate": 2.582573351094716e-05, "loss": 2.6787, "step": 283100 }, { "epoch": 7.25, "learning_rate": 2.581719438467056e-05, "loss": 2.586, "step": 283200 }, { "epoch": 7.26, "learning_rate": 2.580865525839396e-05, "loss": 2.4844, "step": 283300 }, { "epoch": 7.26, "learning_rate": 2.5800116132117364e-05, "loss": 2.7313, "step": 283400 }, { "epoch": 7.26, "learning_rate": 2.5791577005840762e-05, "loss": 2.6168, "step": 283500 }, { "epoch": 7.27, "learning_rate": 2.5783037879564164e-05, "loss": 2.4937, "step": 283600 }, { "epoch": 7.27, "learning_rate": 2.5774498753287562e-05, "loss": 2.7365, "step": 283700 }, { "epoch": 7.27, "learning_rate": 2.5765959627010967e-05, "loss": 2.626, "step": 283800 }, { "epoch": 7.27, "learning_rate": 2.5757420500734365e-05, "loss": 2.7652, "step": 283900 }, { "epoch": 7.28, "learning_rate": 2.5748881374457767e-05, "loss": 2.7086, "step": 284000 }, { "epoch": 7.28, "learning_rate": 2.5740342248181165e-05, "loss": 2.5696, "step": 284100 }, { "epoch": 7.28, "learning_rate": 2.573180312190457e-05, "loss": 2.6369, "step": 284200 }, { "epoch": 7.28, "learning_rate": 2.5723263995627968e-05, "loss": 2.6651, "step": 284300 }, { "epoch": 7.29, "learning_rate": 2.571472486935137e-05, "loss": 2.7067, "step": 284400 }, { "epoch": 7.29, "learning_rate": 2.5706185743074768e-05, "loss": 2.3506, "step": 284500 }, { "epoch": 7.29, "learning_rate": 2.5697646616798173e-05, "loss": 2.549, "step": 284600 }, { "epoch": 7.29, "learning_rate": 2.5689107490521567e-05, "loss": 2.7959, "step": 284700 }, { "epoch": 7.3, "learning_rate": 2.5680568364244972e-05, "loss": 2.7381, "step": 284800 }, { "epoch": 7.3, "learning_rate": 2.567202923796837e-05, "loss": 2.6734, "step": 284900 }, { "epoch": 7.3, "learning_rate": 2.5663490111691772e-05, "loss": 2.5239, "step": 285000 }, { "epoch": 7.3, "learning_rate": 2.565495098541517e-05, "loss": 2.5626, "step": 285100 }, { "epoch": 7.31, "learning_rate": 2.5646411859138575e-05, "loss": 2.4925, "step": 285200 }, { "epoch": 7.31, "learning_rate": 2.5637872732861973e-05, "loss": 2.6307, "step": 285300 }, { "epoch": 7.31, "learning_rate": 2.5629333606585375e-05, "loss": 2.4771, "step": 285400 }, { "epoch": 7.31, "learning_rate": 2.5620794480308773e-05, "loss": 2.5061, "step": 285500 }, { "epoch": 7.32, "learning_rate": 2.5612255354032178e-05, "loss": 2.5368, "step": 285600 }, { "epoch": 7.32, "learning_rate": 2.5603716227755576e-05, "loss": 2.7254, "step": 285700 }, { "epoch": 7.32, "learning_rate": 2.5595177101478978e-05, "loss": 2.6842, "step": 285800 }, { "epoch": 7.32, "learning_rate": 2.5586637975202376e-05, "loss": 2.7254, "step": 285900 }, { "epoch": 7.33, "learning_rate": 2.557809884892578e-05, "loss": 2.6652, "step": 286000 }, { "epoch": 7.33, "learning_rate": 2.556955972264918e-05, "loss": 2.473, "step": 286100 }, { "epoch": 7.33, "learning_rate": 2.556102059637258e-05, "loss": 2.6769, "step": 286200 }, { "epoch": 7.33, "learning_rate": 2.555248147009598e-05, "loss": 2.6187, "step": 286300 }, { "epoch": 7.34, "learning_rate": 2.5543942343819384e-05, "loss": 2.6971, "step": 286400 }, { "epoch": 7.34, "learning_rate": 2.5535403217542782e-05, "loss": 2.5785, "step": 286500 }, { "epoch": 7.34, "learning_rate": 2.5526864091266183e-05, "loss": 2.6179, "step": 286600 }, { "epoch": 7.34, "learning_rate": 2.551832496498958e-05, "loss": 2.4922, "step": 286700 }, { "epoch": 7.35, "learning_rate": 2.5509785838712986e-05, "loss": 2.623, "step": 286800 }, { "epoch": 7.35, "learning_rate": 2.5501246712436385e-05, "loss": 2.6506, "step": 286900 }, { "epoch": 7.35, "learning_rate": 2.5492707586159786e-05, "loss": 2.4052, "step": 287000 }, { "epoch": 7.35, "learning_rate": 2.5484168459883184e-05, "loss": 2.5711, "step": 287100 }, { "epoch": 7.36, "learning_rate": 2.547562933360659e-05, "loss": 2.7055, "step": 287200 }, { "epoch": 7.36, "learning_rate": 2.5467090207329984e-05, "loss": 2.4519, "step": 287300 }, { "epoch": 7.36, "learning_rate": 2.545855108105339e-05, "loss": 2.6229, "step": 287400 }, { "epoch": 7.36, "learning_rate": 2.5450011954776787e-05, "loss": 2.6368, "step": 287500 }, { "epoch": 7.37, "learning_rate": 2.544147282850019e-05, "loss": 2.7205, "step": 287600 }, { "epoch": 7.37, "learning_rate": 2.5432933702223587e-05, "loss": 2.6398, "step": 287700 }, { "epoch": 7.37, "learning_rate": 2.542439457594699e-05, "loss": 2.5763, "step": 287800 }, { "epoch": 7.38, "learning_rate": 2.541585544967039e-05, "loss": 2.5818, "step": 287900 }, { "epoch": 7.38, "learning_rate": 2.540731632339379e-05, "loss": 2.5528, "step": 288000 }, { "epoch": 7.38, "learning_rate": 2.539877719711719e-05, "loss": 2.5105, "step": 288100 }, { "epoch": 7.38, "learning_rate": 2.5390238070840594e-05, "loss": 2.5778, "step": 288200 }, { "epoch": 7.39, "learning_rate": 2.5381698944563993e-05, "loss": 2.6856, "step": 288300 }, { "epoch": 7.39, "learning_rate": 2.5373159818287394e-05, "loss": 2.6557, "step": 288400 }, { "epoch": 7.39, "learning_rate": 2.5364620692010792e-05, "loss": 2.5503, "step": 288500 }, { "epoch": 7.39, "learning_rate": 2.5356081565734197e-05, "loss": 2.5558, "step": 288600 }, { "epoch": 7.4, "learning_rate": 2.5347542439457595e-05, "loss": 2.6848, "step": 288700 }, { "epoch": 7.4, "learning_rate": 2.5339003313180997e-05, "loss": 2.5382, "step": 288800 }, { "epoch": 7.4, "learning_rate": 2.5330464186904395e-05, "loss": 2.6446, "step": 288900 }, { "epoch": 7.4, "learning_rate": 2.53219250606278e-05, "loss": 2.7562, "step": 289000 }, { "epoch": 7.41, "learning_rate": 2.5313385934351198e-05, "loss": 2.4963, "step": 289100 }, { "epoch": 7.41, "learning_rate": 2.53048468080746e-05, "loss": 2.5525, "step": 289200 }, { "epoch": 7.41, "learning_rate": 2.5296307681797998e-05, "loss": 2.6787, "step": 289300 }, { "epoch": 7.41, "learning_rate": 2.5287768555521403e-05, "loss": 2.6217, "step": 289400 }, { "epoch": 7.42, "learning_rate": 2.52792294292448e-05, "loss": 2.3872, "step": 289500 }, { "epoch": 7.42, "learning_rate": 2.5270690302968203e-05, "loss": 2.7578, "step": 289600 }, { "epoch": 7.42, "learning_rate": 2.52621511766916e-05, "loss": 2.5643, "step": 289700 }, { "epoch": 7.42, "learning_rate": 2.5253612050415006e-05, "loss": 2.4704, "step": 289800 }, { "epoch": 7.43, "learning_rate": 2.52450729241384e-05, "loss": 2.5876, "step": 289900 }, { "epoch": 7.43, "learning_rate": 2.5236533797861805e-05, "loss": 2.6602, "step": 290000 }, { "epoch": 7.43, "learning_rate": 2.5227994671585204e-05, "loss": 2.4969, "step": 290100 }, { "epoch": 7.43, "learning_rate": 2.5219455545308605e-05, "loss": 2.6634, "step": 290200 }, { "epoch": 7.44, "learning_rate": 2.5210916419032003e-05, "loss": 2.5549, "step": 290300 }, { "epoch": 7.44, "learning_rate": 2.5202377292755408e-05, "loss": 2.5112, "step": 290400 }, { "epoch": 7.44, "learning_rate": 2.5193838166478806e-05, "loss": 2.4683, "step": 290500 }, { "epoch": 7.44, "learning_rate": 2.5185299040202208e-05, "loss": 2.605, "step": 290600 }, { "epoch": 7.45, "learning_rate": 2.5176759913925606e-05, "loss": 2.5804, "step": 290700 }, { "epoch": 7.45, "learning_rate": 2.516822078764901e-05, "loss": 2.476, "step": 290800 }, { "epoch": 7.45, "learning_rate": 2.515968166137241e-05, "loss": 2.672, "step": 290900 }, { "epoch": 7.45, "learning_rate": 2.515114253509581e-05, "loss": 2.7042, "step": 291000 }, { "epoch": 7.46, "learning_rate": 2.514260340881921e-05, "loss": 2.4737, "step": 291100 }, { "epoch": 7.46, "learning_rate": 2.5134064282542614e-05, "loss": 2.6669, "step": 291200 }, { "epoch": 7.46, "learning_rate": 2.5125525156266012e-05, "loss": 2.5015, "step": 291300 }, { "epoch": 7.46, "learning_rate": 2.5116986029989414e-05, "loss": 2.5606, "step": 291400 }, { "epoch": 7.47, "learning_rate": 2.5108446903712812e-05, "loss": 2.5226, "step": 291500 }, { "epoch": 7.47, "learning_rate": 2.5099907777436217e-05, "loss": 2.6841, "step": 291600 }, { "epoch": 7.47, "learning_rate": 2.5091368651159615e-05, "loss": 2.5918, "step": 291700 }, { "epoch": 7.48, "learning_rate": 2.5082829524883016e-05, "loss": 2.6181, "step": 291800 }, { "epoch": 7.48, "learning_rate": 2.5074290398606415e-05, "loss": 2.5589, "step": 291900 }, { "epoch": 7.48, "learning_rate": 2.506575127232982e-05, "loss": 2.5423, "step": 292000 }, { "epoch": 7.48, "learning_rate": 2.5057212146053218e-05, "loss": 2.7337, "step": 292100 }, { "epoch": 7.49, "learning_rate": 2.504867301977662e-05, "loss": 2.6391, "step": 292200 }, { "epoch": 7.49, "learning_rate": 2.5040133893500017e-05, "loss": 2.3304, "step": 292300 }, { "epoch": 7.49, "learning_rate": 2.5031594767223422e-05, "loss": 2.4947, "step": 292400 }, { "epoch": 7.49, "learning_rate": 2.5023055640946817e-05, "loss": 2.49, "step": 292500 }, { "epoch": 7.5, "learning_rate": 2.5014516514670222e-05, "loss": 2.5896, "step": 292600 }, { "epoch": 7.5, "learning_rate": 2.500597738839362e-05, "loss": 2.4626, "step": 292700 }, { "epoch": 7.5, "learning_rate": 2.499743826211702e-05, "loss": 2.6996, "step": 292800 }, { "epoch": 7.5, "learning_rate": 2.498889913584042e-05, "loss": 2.556, "step": 292900 }, { "epoch": 7.51, "learning_rate": 2.498036000956382e-05, "loss": 2.7386, "step": 293000 }, { "epoch": 7.51, "learning_rate": 2.4971820883287223e-05, "loss": 2.5582, "step": 293100 }, { "epoch": 7.51, "learning_rate": 2.4963281757010624e-05, "loss": 2.5779, "step": 293200 }, { "epoch": 7.51, "learning_rate": 2.4954742630734023e-05, "loss": 2.5479, "step": 293300 }, { "epoch": 7.52, "learning_rate": 2.4946203504457424e-05, "loss": 2.6817, "step": 293400 }, { "epoch": 7.52, "learning_rate": 2.4937664378180826e-05, "loss": 2.5313, "step": 293500 }, { "epoch": 7.52, "learning_rate": 2.4929125251904227e-05, "loss": 2.7259, "step": 293600 }, { "epoch": 7.52, "learning_rate": 2.4920586125627625e-05, "loss": 2.674, "step": 293700 }, { "epoch": 7.53, "learning_rate": 2.4912046999351027e-05, "loss": 2.3784, "step": 293800 }, { "epoch": 7.53, "learning_rate": 2.490350787307443e-05, "loss": 2.4276, "step": 293900 }, { "epoch": 7.53, "learning_rate": 2.4894968746797827e-05, "loss": 2.6637, "step": 294000 }, { "epoch": 7.53, "learning_rate": 2.4886429620521228e-05, "loss": 2.4605, "step": 294100 }, { "epoch": 7.54, "learning_rate": 2.487789049424463e-05, "loss": 2.5052, "step": 294200 }, { "epoch": 7.54, "learning_rate": 2.486935136796803e-05, "loss": 2.6614, "step": 294300 }, { "epoch": 7.54, "learning_rate": 2.486081224169143e-05, "loss": 2.5257, "step": 294400 }, { "epoch": 7.54, "learning_rate": 2.485227311541483e-05, "loss": 2.5516, "step": 294500 }, { "epoch": 7.55, "learning_rate": 2.4843733989138233e-05, "loss": 2.7613, "step": 294600 }, { "epoch": 7.55, "learning_rate": 2.4835194862861634e-05, "loss": 2.5585, "step": 294700 }, { "epoch": 7.55, "learning_rate": 2.4826655736585032e-05, "loss": 2.5098, "step": 294800 }, { "epoch": 7.55, "learning_rate": 2.4818116610308434e-05, "loss": 2.607, "step": 294900 }, { "epoch": 7.56, "learning_rate": 2.4809577484031835e-05, "loss": 2.634, "step": 295000 }, { "epoch": 7.56, "learning_rate": 2.4801038357755234e-05, "loss": 2.443, "step": 295100 }, { "epoch": 7.56, "learning_rate": 2.4792499231478635e-05, "loss": 2.5236, "step": 295200 }, { "epoch": 7.56, "learning_rate": 2.4783960105202037e-05, "loss": 2.6182, "step": 295300 }, { "epoch": 7.57, "learning_rate": 2.4775420978925438e-05, "loss": 2.5835, "step": 295400 }, { "epoch": 7.57, "learning_rate": 2.4766881852648836e-05, "loss": 2.8453, "step": 295500 }, { "epoch": 7.57, "learning_rate": 2.4758342726372238e-05, "loss": 2.6775, "step": 295600 }, { "epoch": 7.58, "learning_rate": 2.474980360009564e-05, "loss": 2.7386, "step": 295700 }, { "epoch": 7.58, "learning_rate": 2.474126447381904e-05, "loss": 2.56, "step": 295800 }, { "epoch": 7.58, "learning_rate": 2.473272534754244e-05, "loss": 2.703, "step": 295900 }, { "epoch": 7.58, "learning_rate": 2.472418622126584e-05, "loss": 2.6336, "step": 296000 }, { "epoch": 7.59, "learning_rate": 2.4715647094989242e-05, "loss": 2.8031, "step": 296100 }, { "epoch": 7.59, "learning_rate": 2.4707107968712644e-05, "loss": 2.5844, "step": 296200 }, { "epoch": 7.59, "learning_rate": 2.4698568842436042e-05, "loss": 2.4344, "step": 296300 }, { "epoch": 7.59, "learning_rate": 2.4690029716159444e-05, "loss": 2.5045, "step": 296400 }, { "epoch": 7.6, "learning_rate": 2.4681490589882845e-05, "loss": 2.6255, "step": 296500 }, { "epoch": 7.6, "learning_rate": 2.4672951463606243e-05, "loss": 2.4738, "step": 296600 }, { "epoch": 7.6, "learning_rate": 2.4664412337329645e-05, "loss": 2.8838, "step": 296700 }, { "epoch": 7.6, "learning_rate": 2.4655873211053046e-05, "loss": 2.5313, "step": 296800 }, { "epoch": 7.61, "learning_rate": 2.4647334084776448e-05, "loss": 2.4704, "step": 296900 }, { "epoch": 7.61, "learning_rate": 2.4638794958499846e-05, "loss": 2.7296, "step": 297000 }, { "epoch": 7.61, "learning_rate": 2.4630255832223248e-05, "loss": 2.4954, "step": 297100 }, { "epoch": 7.61, "learning_rate": 2.462171670594665e-05, "loss": 2.4362, "step": 297200 }, { "epoch": 7.62, "learning_rate": 2.461317757967005e-05, "loss": 2.6174, "step": 297300 }, { "epoch": 7.62, "learning_rate": 2.460463845339345e-05, "loss": 2.7054, "step": 297400 }, { "epoch": 7.62, "learning_rate": 2.459609932711685e-05, "loss": 2.6661, "step": 297500 }, { "epoch": 7.62, "learning_rate": 2.4587560200840252e-05, "loss": 2.6271, "step": 297600 }, { "epoch": 7.63, "learning_rate": 2.457902107456365e-05, "loss": 2.6175, "step": 297700 }, { "epoch": 7.63, "learning_rate": 2.457048194828705e-05, "loss": 2.6022, "step": 297800 }, { "epoch": 7.63, "learning_rate": 2.4561942822010453e-05, "loss": 2.4242, "step": 297900 }, { "epoch": 7.63, "learning_rate": 2.4553403695733855e-05, "loss": 2.5952, "step": 298000 }, { "epoch": 7.64, "learning_rate": 2.4544864569457253e-05, "loss": 2.5788, "step": 298100 }, { "epoch": 7.64, "learning_rate": 2.4536325443180654e-05, "loss": 2.6518, "step": 298200 }, { "epoch": 7.64, "learning_rate": 2.4527786316904056e-05, "loss": 2.5203, "step": 298300 }, { "epoch": 7.64, "learning_rate": 2.4519247190627458e-05, "loss": 2.605, "step": 298400 }, { "epoch": 7.65, "learning_rate": 2.4510708064350856e-05, "loss": 2.6588, "step": 298500 }, { "epoch": 7.65, "learning_rate": 2.4502168938074257e-05, "loss": 2.5363, "step": 298600 }, { "epoch": 7.65, "learning_rate": 2.449362981179766e-05, "loss": 2.4543, "step": 298700 }, { "epoch": 7.65, "learning_rate": 2.448509068552106e-05, "loss": 2.7616, "step": 298800 }, { "epoch": 7.66, "learning_rate": 2.447655155924446e-05, "loss": 2.6632, "step": 298900 }, { "epoch": 7.66, "learning_rate": 2.446801243296786e-05, "loss": 2.5355, "step": 299000 }, { "epoch": 7.66, "learning_rate": 2.445947330669126e-05, "loss": 2.8136, "step": 299100 }, { "epoch": 7.66, "learning_rate": 2.445093418041466e-05, "loss": 2.7673, "step": 299200 }, { "epoch": 7.67, "learning_rate": 2.444239505413806e-05, "loss": 2.6112, "step": 299300 }, { "epoch": 7.67, "learning_rate": 2.4433855927861463e-05, "loss": 2.616, "step": 299400 }, { "epoch": 7.67, "learning_rate": 2.4425316801584864e-05, "loss": 2.6405, "step": 299500 }, { "epoch": 7.67, "learning_rate": 2.4416777675308263e-05, "loss": 2.6973, "step": 299600 }, { "epoch": 7.68, "learning_rate": 2.4408238549031664e-05, "loss": 2.7004, "step": 299700 }, { "epoch": 7.68, "learning_rate": 2.4399699422755066e-05, "loss": 2.5356, "step": 299800 }, { "epoch": 7.68, "learning_rate": 2.4391160296478467e-05, "loss": 2.6527, "step": 299900 }, { "epoch": 7.69, "learning_rate": 2.4382621170201865e-05, "loss": 2.4687, "step": 300000 }, { "epoch": 7.69, "learning_rate": 2.4374082043925267e-05, "loss": 2.3935, "step": 300100 }, { "epoch": 7.69, "learning_rate": 2.436554291764867e-05, "loss": 2.6342, "step": 300200 }, { "epoch": 7.69, "learning_rate": 2.435700379137207e-05, "loss": 2.682, "step": 300300 }, { "epoch": 7.7, "learning_rate": 2.4348464665095468e-05, "loss": 2.4398, "step": 300400 }, { "epoch": 7.7, "learning_rate": 2.433992553881887e-05, "loss": 2.7451, "step": 300500 }, { "epoch": 7.7, "learning_rate": 2.433138641254227e-05, "loss": 2.4977, "step": 300600 }, { "epoch": 7.7, "learning_rate": 2.432284728626567e-05, "loss": 2.4256, "step": 300700 }, { "epoch": 7.71, "learning_rate": 2.431430815998907e-05, "loss": 2.593, "step": 300800 }, { "epoch": 7.71, "learning_rate": 2.4305769033712473e-05, "loss": 2.3915, "step": 300900 }, { "epoch": 7.71, "learning_rate": 2.4297229907435874e-05, "loss": 2.5495, "step": 301000 }, { "epoch": 7.71, "learning_rate": 2.4288690781159272e-05, "loss": 2.8005, "step": 301100 }, { "epoch": 7.72, "learning_rate": 2.4280151654882674e-05, "loss": 2.5199, "step": 301200 }, { "epoch": 7.72, "learning_rate": 2.4271612528606075e-05, "loss": 2.6356, "step": 301300 }, { "epoch": 7.72, "learning_rate": 2.4263073402329477e-05, "loss": 2.6186, "step": 301400 }, { "epoch": 7.72, "learning_rate": 2.4254534276052875e-05, "loss": 2.3638, "step": 301500 }, { "epoch": 7.73, "learning_rate": 2.4245995149776277e-05, "loss": 2.4584, "step": 301600 }, { "epoch": 7.73, "learning_rate": 2.4237456023499678e-05, "loss": 2.4968, "step": 301700 }, { "epoch": 7.73, "learning_rate": 2.4228916897223076e-05, "loss": 2.4364, "step": 301800 }, { "epoch": 7.73, "learning_rate": 2.4220377770946478e-05, "loss": 2.4099, "step": 301900 }, { "epoch": 7.74, "learning_rate": 2.421183864466988e-05, "loss": 2.5179, "step": 302000 }, { "epoch": 7.74, "learning_rate": 2.420329951839328e-05, "loss": 2.4713, "step": 302100 }, { "epoch": 7.74, "learning_rate": 2.419476039211668e-05, "loss": 2.6405, "step": 302200 }, { "epoch": 7.74, "learning_rate": 2.418622126584008e-05, "loss": 2.4238, "step": 302300 }, { "epoch": 7.75, "learning_rate": 2.4177682139563482e-05, "loss": 2.4564, "step": 302400 }, { "epoch": 7.75, "learning_rate": 2.4169143013286884e-05, "loss": 2.554, "step": 302500 }, { "epoch": 7.75, "learning_rate": 2.4160603887010282e-05, "loss": 2.438, "step": 302600 }, { "epoch": 7.75, "learning_rate": 2.4152064760733683e-05, "loss": 2.4269, "step": 302700 }, { "epoch": 7.76, "learning_rate": 2.4143525634457085e-05, "loss": 2.4377, "step": 302800 }, { "epoch": 7.76, "learning_rate": 2.4134986508180487e-05, "loss": 2.5265, "step": 302900 }, { "epoch": 7.76, "learning_rate": 2.4126447381903885e-05, "loss": 2.639, "step": 303000 }, { "epoch": 7.76, "learning_rate": 2.4117908255627286e-05, "loss": 2.4689, "step": 303100 }, { "epoch": 7.77, "learning_rate": 2.4109369129350688e-05, "loss": 2.601, "step": 303200 }, { "epoch": 7.77, "learning_rate": 2.4100830003074086e-05, "loss": 2.5779, "step": 303300 }, { "epoch": 7.77, "learning_rate": 2.4092290876797488e-05, "loss": 2.6439, "step": 303400 }, { "epoch": 7.77, "learning_rate": 2.408375175052089e-05, "loss": 2.6182, "step": 303500 }, { "epoch": 7.78, "learning_rate": 2.407521262424429e-05, "loss": 2.6911, "step": 303600 }, { "epoch": 7.78, "learning_rate": 2.406667349796769e-05, "loss": 2.453, "step": 303700 }, { "epoch": 7.78, "learning_rate": 2.405813437169109e-05, "loss": 2.5643, "step": 303800 }, { "epoch": 7.79, "learning_rate": 2.4049595245414492e-05, "loss": 2.6153, "step": 303900 }, { "epoch": 7.79, "learning_rate": 2.4041056119137893e-05, "loss": 2.5002, "step": 304000 }, { "epoch": 7.79, "learning_rate": 2.403251699286129e-05, "loss": 2.5769, "step": 304100 }, { "epoch": 7.79, "learning_rate": 2.4023977866584693e-05, "loss": 2.4474, "step": 304200 }, { "epoch": 7.8, "learning_rate": 2.4015438740308095e-05, "loss": 2.5044, "step": 304300 }, { "epoch": 7.8, "learning_rate": 2.4006899614031493e-05, "loss": 2.5741, "step": 304400 }, { "epoch": 7.8, "learning_rate": 2.3998360487754894e-05, "loss": 2.612, "step": 304500 }, { "epoch": 7.8, "learning_rate": 2.3989821361478296e-05, "loss": 2.3832, "step": 304600 }, { "epoch": 7.81, "learning_rate": 2.3981282235201697e-05, "loss": 2.5406, "step": 304700 }, { "epoch": 7.81, "learning_rate": 2.3972743108925096e-05, "loss": 2.5468, "step": 304800 }, { "epoch": 7.81, "learning_rate": 2.3964203982648497e-05, "loss": 2.4277, "step": 304900 }, { "epoch": 7.81, "learning_rate": 2.39556648563719e-05, "loss": 2.4712, "step": 305000 }, { "epoch": 7.82, "learning_rate": 2.39471257300953e-05, "loss": 2.5574, "step": 305100 }, { "epoch": 7.82, "learning_rate": 2.39385866038187e-05, "loss": 2.6474, "step": 305200 }, { "epoch": 7.82, "learning_rate": 2.39300474775421e-05, "loss": 2.8032, "step": 305300 }, { "epoch": 7.82, "learning_rate": 2.39215083512655e-05, "loss": 2.5209, "step": 305400 }, { "epoch": 7.83, "learning_rate": 2.3912969224988903e-05, "loss": 2.5471, "step": 305500 }, { "epoch": 7.83, "learning_rate": 2.39044300987123e-05, "loss": 2.5731, "step": 305600 }, { "epoch": 7.83, "learning_rate": 2.3895890972435703e-05, "loss": 2.5844, "step": 305700 }, { "epoch": 7.83, "learning_rate": 2.3887351846159104e-05, "loss": 2.4313, "step": 305800 }, { "epoch": 7.84, "learning_rate": 2.3878812719882503e-05, "loss": 2.5094, "step": 305900 }, { "epoch": 7.84, "learning_rate": 2.3870273593605904e-05, "loss": 2.539, "step": 306000 }, { "epoch": 7.84, "learning_rate": 2.3861734467329306e-05, "loss": 2.5007, "step": 306100 }, { "epoch": 7.84, "learning_rate": 2.3853195341052707e-05, "loss": 2.4104, "step": 306200 }, { "epoch": 7.85, "learning_rate": 2.3844656214776105e-05, "loss": 2.6048, "step": 306300 }, { "epoch": 7.85, "learning_rate": 2.3836117088499507e-05, "loss": 2.7752, "step": 306400 }, { "epoch": 7.85, "learning_rate": 2.382757796222291e-05, "loss": 2.6126, "step": 306500 }, { "epoch": 7.85, "learning_rate": 2.381903883594631e-05, "loss": 2.4156, "step": 306600 }, { "epoch": 7.86, "learning_rate": 2.3810499709669708e-05, "loss": 2.5983, "step": 306700 }, { "epoch": 7.86, "learning_rate": 2.3801960583393106e-05, "loss": 2.4602, "step": 306800 }, { "epoch": 7.86, "learning_rate": 2.3793421457116508e-05, "loss": 2.6034, "step": 306900 }, { "epoch": 7.86, "learning_rate": 2.378488233083991e-05, "loss": 2.5708, "step": 307000 }, { "epoch": 7.87, "learning_rate": 2.3776343204563308e-05, "loss": 2.4888, "step": 307100 }, { "epoch": 7.87, "learning_rate": 2.376780407828671e-05, "loss": 2.547, "step": 307200 }, { "epoch": 7.87, "learning_rate": 2.375926495201011e-05, "loss": 2.5943, "step": 307300 }, { "epoch": 7.87, "learning_rate": 2.3750725825733512e-05, "loss": 2.5553, "step": 307400 }, { "epoch": 7.88, "learning_rate": 2.374218669945691e-05, "loss": 2.5765, "step": 307500 }, { "epoch": 7.88, "learning_rate": 2.3733647573180312e-05, "loss": 2.5925, "step": 307600 }, { "epoch": 7.88, "learning_rate": 2.3725108446903713e-05, "loss": 2.4431, "step": 307700 }, { "epoch": 7.89, "learning_rate": 2.3716569320627115e-05, "loss": 2.6363, "step": 307800 }, { "epoch": 7.89, "learning_rate": 2.3708030194350513e-05, "loss": 2.6017, "step": 307900 }, { "epoch": 7.89, "learning_rate": 2.3699491068073915e-05, "loss": 2.4631, "step": 308000 }, { "epoch": 7.89, "learning_rate": 2.3690951941797316e-05, "loss": 2.4872, "step": 308100 }, { "epoch": 7.9, "learning_rate": 2.3682412815520714e-05, "loss": 2.5542, "step": 308200 }, { "epoch": 7.9, "learning_rate": 2.3673873689244116e-05, "loss": 2.5386, "step": 308300 }, { "epoch": 7.9, "learning_rate": 2.3665334562967517e-05, "loss": 2.363, "step": 308400 }, { "epoch": 7.9, "learning_rate": 2.365679543669092e-05, "loss": 2.5627, "step": 308500 }, { "epoch": 7.91, "learning_rate": 2.3648256310414317e-05, "loss": 2.5397, "step": 308600 }, { "epoch": 7.91, "learning_rate": 2.363971718413772e-05, "loss": 2.571, "step": 308700 }, { "epoch": 7.91, "learning_rate": 2.363117805786112e-05, "loss": 2.5536, "step": 308800 }, { "epoch": 7.91, "learning_rate": 2.3622638931584522e-05, "loss": 2.4963, "step": 308900 }, { "epoch": 7.92, "learning_rate": 2.361409980530792e-05, "loss": 2.538, "step": 309000 }, { "epoch": 7.92, "learning_rate": 2.360556067903132e-05, "loss": 2.5102, "step": 309100 }, { "epoch": 7.92, "learning_rate": 2.3597021552754723e-05, "loss": 2.3795, "step": 309200 }, { "epoch": 7.92, "learning_rate": 2.358848242647812e-05, "loss": 2.6052, "step": 309300 }, { "epoch": 7.93, "learning_rate": 2.3579943300201523e-05, "loss": 2.6662, "step": 309400 }, { "epoch": 7.93, "learning_rate": 2.3571404173924924e-05, "loss": 2.6022, "step": 309500 }, { "epoch": 7.93, "learning_rate": 2.3562865047648326e-05, "loss": 2.5958, "step": 309600 }, { "epoch": 7.93, "learning_rate": 2.3554325921371724e-05, "loss": 2.6017, "step": 309700 }, { "epoch": 7.94, "learning_rate": 2.3545786795095126e-05, "loss": 2.5168, "step": 309800 }, { "epoch": 7.94, "learning_rate": 2.3537247668818527e-05, "loss": 2.3941, "step": 309900 }, { "epoch": 7.94, "learning_rate": 2.352870854254193e-05, "loss": 2.5736, "step": 310000 }, { "epoch": 7.94, "learning_rate": 2.3520169416265327e-05, "loss": 2.6677, "step": 310100 }, { "epoch": 7.95, "learning_rate": 2.351163028998873e-05, "loss": 2.5065, "step": 310200 }, { "epoch": 7.95, "learning_rate": 2.350309116371213e-05, "loss": 2.6352, "step": 310300 }, { "epoch": 7.95, "learning_rate": 2.349455203743553e-05, "loss": 2.5603, "step": 310400 }, { "epoch": 7.95, "learning_rate": 2.348601291115893e-05, "loss": 2.6301, "step": 310500 }, { "epoch": 7.96, "learning_rate": 2.347747378488233e-05, "loss": 2.6784, "step": 310600 }, { "epoch": 7.96, "learning_rate": 2.3468934658605733e-05, "loss": 2.4355, "step": 310700 }, { "epoch": 7.96, "learning_rate": 2.346039553232913e-05, "loss": 2.6839, "step": 310800 }, { "epoch": 7.96, "learning_rate": 2.3451856406052532e-05, "loss": 2.665, "step": 310900 }, { "epoch": 7.97, "learning_rate": 2.3443317279775934e-05, "loss": 2.5209, "step": 311000 }, { "epoch": 7.97, "learning_rate": 2.3434778153499336e-05, "loss": 2.4111, "step": 311100 }, { "epoch": 7.97, "learning_rate": 2.3426239027222734e-05, "loss": 2.6543, "step": 311200 }, { "epoch": 7.97, "learning_rate": 2.3417699900946135e-05, "loss": 2.5009, "step": 311300 }, { "epoch": 7.98, "learning_rate": 2.3409160774669537e-05, "loss": 2.4491, "step": 311400 }, { "epoch": 7.98, "learning_rate": 2.340062164839294e-05, "loss": 2.6839, "step": 311500 }, { "epoch": 7.98, "learning_rate": 2.3392082522116337e-05, "loss": 2.4937, "step": 311600 }, { "epoch": 7.98, "learning_rate": 2.3383543395839738e-05, "loss": 2.6359, "step": 311700 }, { "epoch": 7.99, "learning_rate": 2.337500426956314e-05, "loss": 2.591, "step": 311800 }, { "epoch": 7.99, "learning_rate": 2.3366465143286538e-05, "loss": 2.5896, "step": 311900 }, { "epoch": 7.99, "learning_rate": 2.335792601700994e-05, "loss": 2.4467, "step": 312000 }, { "epoch": 8.0, "learning_rate": 2.334938689073334e-05, "loss": 2.3812, "step": 312100 }, { "epoch": 8.0, "learning_rate": 2.3340847764456742e-05, "loss": 2.3776, "step": 312200 }, { "epoch": 8.0, "learning_rate": 2.333230863818014e-05, "loss": 2.6316, "step": 312300 }, { "epoch": 8.0, "learning_rate": 2.3323769511903542e-05, "loss": 2.5576, "step": 312400 }, { "epoch": 8.01, "learning_rate": 2.3315230385626944e-05, "loss": 2.5214, "step": 312500 }, { "epoch": 8.01, "learning_rate": 2.3306691259350345e-05, "loss": 2.5585, "step": 312600 }, { "epoch": 8.01, "learning_rate": 2.3298152133073743e-05, "loss": 2.5395, "step": 312700 }, { "epoch": 8.01, "learning_rate": 2.3289613006797145e-05, "loss": 2.4142, "step": 312800 }, { "epoch": 8.02, "learning_rate": 2.3281073880520547e-05, "loss": 2.576, "step": 312900 }, { "epoch": 8.02, "learning_rate": 2.3272534754243948e-05, "loss": 2.5555, "step": 313000 }, { "epoch": 8.02, "learning_rate": 2.3263995627967346e-05, "loss": 2.5797, "step": 313100 }, { "epoch": 8.02, "learning_rate": 2.3255456501690748e-05, "loss": 2.5472, "step": 313200 }, { "epoch": 8.03, "learning_rate": 2.324691737541415e-05, "loss": 2.4686, "step": 313300 }, { "epoch": 8.03, "learning_rate": 2.3238378249137547e-05, "loss": 2.5522, "step": 313400 }, { "epoch": 8.03, "learning_rate": 2.322983912286095e-05, "loss": 2.4373, "step": 313500 }, { "epoch": 8.03, "learning_rate": 2.322129999658435e-05, "loss": 2.487, "step": 313600 }, { "epoch": 8.04, "learning_rate": 2.3212760870307752e-05, "loss": 2.3727, "step": 313700 }, { "epoch": 8.04, "learning_rate": 2.320422174403115e-05, "loss": 2.6973, "step": 313800 }, { "epoch": 8.04, "learning_rate": 2.3195682617754552e-05, "loss": 2.3589, "step": 313900 }, { "epoch": 8.04, "learning_rate": 2.3187143491477953e-05, "loss": 2.6867, "step": 314000 }, { "epoch": 8.05, "learning_rate": 2.3178604365201355e-05, "loss": 2.6313, "step": 314100 }, { "epoch": 8.05, "learning_rate": 2.3170065238924753e-05, "loss": 2.4842, "step": 314200 }, { "epoch": 8.05, "learning_rate": 2.3161526112648155e-05, "loss": 2.5929, "step": 314300 }, { "epoch": 8.05, "learning_rate": 2.3152986986371556e-05, "loss": 2.4353, "step": 314400 }, { "epoch": 8.06, "learning_rate": 2.3144447860094954e-05, "loss": 2.5274, "step": 314500 }, { "epoch": 8.06, "learning_rate": 2.3135908733818356e-05, "loss": 2.3902, "step": 314600 }, { "epoch": 8.06, "learning_rate": 2.3127369607541757e-05, "loss": 2.5433, "step": 314700 }, { "epoch": 8.06, "learning_rate": 2.311883048126516e-05, "loss": 2.7213, "step": 314800 }, { "epoch": 8.07, "learning_rate": 2.3110291354988557e-05, "loss": 2.7245, "step": 314900 }, { "epoch": 8.07, "learning_rate": 2.310175222871196e-05, "loss": 2.6024, "step": 315000 }, { "epoch": 8.07, "learning_rate": 2.309321310243536e-05, "loss": 2.5649, "step": 315100 }, { "epoch": 8.07, "learning_rate": 2.3084673976158762e-05, "loss": 2.5184, "step": 315200 }, { "epoch": 8.08, "learning_rate": 2.307613484988216e-05, "loss": 2.3627, "step": 315300 }, { "epoch": 8.08, "learning_rate": 2.306759572360556e-05, "loss": 2.4564, "step": 315400 }, { "epoch": 8.08, "learning_rate": 2.3059056597328963e-05, "loss": 2.5968, "step": 315500 }, { "epoch": 8.08, "learning_rate": 2.3050517471052365e-05, "loss": 2.5117, "step": 315600 }, { "epoch": 8.09, "learning_rate": 2.3041978344775763e-05, "loss": 2.5541, "step": 315700 }, { "epoch": 8.09, "learning_rate": 2.3033439218499164e-05, "loss": 2.6094, "step": 315800 }, { "epoch": 8.09, "learning_rate": 2.3024900092222566e-05, "loss": 2.5938, "step": 315900 }, { "epoch": 8.1, "learning_rate": 2.3016360965945964e-05, "loss": 2.5505, "step": 316000 }, { "epoch": 8.1, "learning_rate": 2.3007821839669366e-05, "loss": 2.5796, "step": 316100 }, { "epoch": 8.1, "learning_rate": 2.2999282713392767e-05, "loss": 2.4089, "step": 316200 }, { "epoch": 8.1, "learning_rate": 2.299074358711617e-05, "loss": 2.5582, "step": 316300 }, { "epoch": 8.11, "learning_rate": 2.2982204460839567e-05, "loss": 2.5246, "step": 316400 }, { "epoch": 8.11, "learning_rate": 2.297366533456297e-05, "loss": 2.5751, "step": 316500 }, { "epoch": 8.11, "learning_rate": 2.296512620828637e-05, "loss": 2.6564, "step": 316600 }, { "epoch": 8.11, "learning_rate": 2.295658708200977e-05, "loss": 2.552, "step": 316700 }, { "epoch": 8.12, "learning_rate": 2.294804795573317e-05, "loss": 2.5467, "step": 316800 }, { "epoch": 8.12, "learning_rate": 2.293950882945657e-05, "loss": 2.4852, "step": 316900 }, { "epoch": 8.12, "learning_rate": 2.2930969703179973e-05, "loss": 2.4988, "step": 317000 }, { "epoch": 8.12, "learning_rate": 2.2922430576903374e-05, "loss": 2.505, "step": 317100 }, { "epoch": 8.13, "learning_rate": 2.2913891450626772e-05, "loss": 2.4597, "step": 317200 }, { "epoch": 8.13, "learning_rate": 2.2905352324350174e-05, "loss": 2.4072, "step": 317300 }, { "epoch": 8.13, "learning_rate": 2.2896813198073576e-05, "loss": 2.3633, "step": 317400 }, { "epoch": 8.13, "learning_rate": 2.2888274071796974e-05, "loss": 2.6036, "step": 317500 }, { "epoch": 8.14, "learning_rate": 2.2879734945520375e-05, "loss": 2.5262, "step": 317600 }, { "epoch": 8.14, "learning_rate": 2.2871195819243777e-05, "loss": 2.722, "step": 317700 }, { "epoch": 8.14, "learning_rate": 2.286265669296718e-05, "loss": 2.6565, "step": 317800 }, { "epoch": 8.14, "learning_rate": 2.2854117566690576e-05, "loss": 2.4619, "step": 317900 }, { "epoch": 8.15, "learning_rate": 2.2845578440413978e-05, "loss": 2.5398, "step": 318000 }, { "epoch": 8.15, "learning_rate": 2.283703931413738e-05, "loss": 2.6577, "step": 318100 }, { "epoch": 8.15, "learning_rate": 2.282850018786078e-05, "loss": 2.4912, "step": 318200 }, { "epoch": 8.15, "learning_rate": 2.281996106158418e-05, "loss": 2.5452, "step": 318300 }, { "epoch": 8.16, "learning_rate": 2.281142193530758e-05, "loss": 2.6178, "step": 318400 }, { "epoch": 8.16, "learning_rate": 2.2802882809030982e-05, "loss": 2.6458, "step": 318500 }, { "epoch": 8.16, "learning_rate": 2.279434368275438e-05, "loss": 2.6709, "step": 318600 }, { "epoch": 8.16, "learning_rate": 2.2785804556477782e-05, "loss": 2.5509, "step": 318700 }, { "epoch": 8.17, "learning_rate": 2.2777265430201184e-05, "loss": 2.5017, "step": 318800 }, { "epoch": 8.17, "learning_rate": 2.2768726303924585e-05, "loss": 2.483, "step": 318900 }, { "epoch": 8.17, "learning_rate": 2.2760187177647983e-05, "loss": 2.7111, "step": 319000 }, { "epoch": 8.17, "learning_rate": 2.2751648051371385e-05, "loss": 2.5306, "step": 319100 }, { "epoch": 8.18, "learning_rate": 2.2743108925094786e-05, "loss": 2.6905, "step": 319200 }, { "epoch": 8.18, "learning_rate": 2.2734569798818188e-05, "loss": 2.5214, "step": 319300 }, { "epoch": 8.18, "learning_rate": 2.2726030672541586e-05, "loss": 2.5986, "step": 319400 }, { "epoch": 8.18, "learning_rate": 2.2717491546264988e-05, "loss": 2.4584, "step": 319500 }, { "epoch": 8.19, "learning_rate": 2.270895241998839e-05, "loss": 2.4888, "step": 319600 }, { "epoch": 8.19, "learning_rate": 2.270041329371179e-05, "loss": 2.655, "step": 319700 }, { "epoch": 8.19, "learning_rate": 2.269187416743519e-05, "loss": 2.4383, "step": 319800 }, { "epoch": 8.19, "learning_rate": 2.268333504115859e-05, "loss": 2.557, "step": 319900 }, { "epoch": 8.2, "learning_rate": 2.2674795914881992e-05, "loss": 2.6075, "step": 320000 }, { "epoch": 8.2, "learning_rate": 2.266625678860539e-05, "loss": 2.6676, "step": 320100 }, { "epoch": 8.2, "learning_rate": 2.2657717662328792e-05, "loss": 2.5704, "step": 320200 }, { "epoch": 8.21, "learning_rate": 2.2649178536052193e-05, "loss": 2.5918, "step": 320300 }, { "epoch": 8.21, "learning_rate": 2.2640639409775595e-05, "loss": 2.6089, "step": 320400 }, { "epoch": 8.21, "learning_rate": 2.2632100283498993e-05, "loss": 2.5962, "step": 320500 }, { "epoch": 8.21, "learning_rate": 2.2623561157222395e-05, "loss": 2.5116, "step": 320600 }, { "epoch": 8.22, "learning_rate": 2.2615022030945796e-05, "loss": 2.6161, "step": 320700 }, { "epoch": 8.22, "learning_rate": 2.2606482904669198e-05, "loss": 2.4271, "step": 320800 }, { "epoch": 8.22, "learning_rate": 2.2597943778392596e-05, "loss": 2.5744, "step": 320900 }, { "epoch": 8.22, "learning_rate": 2.2589404652115997e-05, "loss": 2.4381, "step": 321000 }, { "epoch": 8.23, "learning_rate": 2.25808655258394e-05, "loss": 2.4659, "step": 321100 }, { "epoch": 8.23, "learning_rate": 2.2572326399562797e-05, "loss": 2.566, "step": 321200 }, { "epoch": 8.23, "learning_rate": 2.25637872732862e-05, "loss": 2.5267, "step": 321300 }, { "epoch": 8.23, "learning_rate": 2.25552481470096e-05, "loss": 2.6958, "step": 321400 }, { "epoch": 8.24, "learning_rate": 2.2546709020733002e-05, "loss": 2.4422, "step": 321500 }, { "epoch": 8.24, "learning_rate": 2.25381698944564e-05, "loss": 2.3804, "step": 321600 }, { "epoch": 8.24, "learning_rate": 2.25296307681798e-05, "loss": 2.5386, "step": 321700 }, { "epoch": 8.24, "learning_rate": 2.2521091641903203e-05, "loss": 2.5558, "step": 321800 }, { "epoch": 8.25, "learning_rate": 2.2512552515626605e-05, "loss": 2.5719, "step": 321900 }, { "epoch": 8.25, "learning_rate": 2.2504013389350003e-05, "loss": 2.509, "step": 322000 }, { "epoch": 8.25, "learning_rate": 2.2495474263073404e-05, "loss": 2.4556, "step": 322100 }, { "epoch": 8.25, "learning_rate": 2.2486935136796806e-05, "loss": 2.6549, "step": 322200 }, { "epoch": 8.26, "learning_rate": 2.2478396010520207e-05, "loss": 2.4002, "step": 322300 }, { "epoch": 8.26, "learning_rate": 2.2469856884243605e-05, "loss": 2.4812, "step": 322400 }, { "epoch": 8.26, "learning_rate": 2.2461317757967007e-05, "loss": 2.6121, "step": 322500 }, { "epoch": 8.26, "learning_rate": 2.245277863169041e-05, "loss": 2.5983, "step": 322600 }, { "epoch": 8.27, "learning_rate": 2.2444239505413807e-05, "loss": 2.4197, "step": 322700 }, { "epoch": 8.27, "learning_rate": 2.2435700379137208e-05, "loss": 2.733, "step": 322800 }, { "epoch": 8.27, "learning_rate": 2.242716125286061e-05, "loss": 2.6397, "step": 322900 }, { "epoch": 8.27, "learning_rate": 2.241862212658401e-05, "loss": 2.3667, "step": 323000 }, { "epoch": 8.28, "learning_rate": 2.241008300030741e-05, "loss": 2.5528, "step": 323100 }, { "epoch": 8.28, "learning_rate": 2.240154387403081e-05, "loss": 2.515, "step": 323200 }, { "epoch": 8.28, "learning_rate": 2.2393004747754213e-05, "loss": 2.5222, "step": 323300 }, { "epoch": 8.28, "learning_rate": 2.2384465621477614e-05, "loss": 2.5107, "step": 323400 }, { "epoch": 8.29, "learning_rate": 2.2375926495201012e-05, "loss": 2.5237, "step": 323500 }, { "epoch": 8.29, "learning_rate": 2.2367387368924414e-05, "loss": 2.4484, "step": 323600 }, { "epoch": 8.29, "learning_rate": 2.2358848242647815e-05, "loss": 2.5927, "step": 323700 }, { "epoch": 8.29, "learning_rate": 2.2350309116371214e-05, "loss": 2.3367, "step": 323800 }, { "epoch": 8.3, "learning_rate": 2.2341769990094615e-05, "loss": 2.5768, "step": 323900 }, { "epoch": 8.3, "learning_rate": 2.2333230863818017e-05, "loss": 2.6143, "step": 324000 }, { "epoch": 8.3, "learning_rate": 2.2324691737541418e-05, "loss": 2.4861, "step": 324100 }, { "epoch": 8.31, "learning_rate": 2.2316152611264816e-05, "loss": 2.6254, "step": 324200 }, { "epoch": 8.31, "learning_rate": 2.2307613484988218e-05, "loss": 2.6897, "step": 324300 }, { "epoch": 8.31, "learning_rate": 2.2299074358711616e-05, "loss": 2.451, "step": 324400 }, { "epoch": 8.31, "learning_rate": 2.2290535232435018e-05, "loss": 2.5221, "step": 324500 }, { "epoch": 8.32, "learning_rate": 2.228199610615842e-05, "loss": 2.5239, "step": 324600 }, { "epoch": 8.32, "learning_rate": 2.2273456979881817e-05, "loss": 2.5147, "step": 324700 }, { "epoch": 8.32, "learning_rate": 2.226491785360522e-05, "loss": 2.5661, "step": 324800 }, { "epoch": 8.32, "learning_rate": 2.225637872732862e-05, "loss": 2.5493, "step": 324900 }, { "epoch": 8.33, "learning_rate": 2.224783960105202e-05, "loss": 2.731, "step": 325000 }, { "epoch": 8.33, "learning_rate": 2.223930047477542e-05, "loss": 2.5801, "step": 325100 }, { "epoch": 8.33, "learning_rate": 2.2230761348498822e-05, "loss": 2.3902, "step": 325200 }, { "epoch": 8.33, "learning_rate": 2.2222222222222223e-05, "loss": 2.6621, "step": 325300 }, { "epoch": 8.34, "learning_rate": 2.221368309594562e-05, "loss": 2.7346, "step": 325400 }, { "epoch": 8.34, "learning_rate": 2.2205143969669023e-05, "loss": 2.4395, "step": 325500 }, { "epoch": 8.34, "learning_rate": 2.2196604843392425e-05, "loss": 2.4414, "step": 325600 }, { "epoch": 8.34, "learning_rate": 2.2188065717115826e-05, "loss": 2.4889, "step": 325700 }, { "epoch": 8.35, "learning_rate": 2.2179526590839224e-05, "loss": 2.5056, "step": 325800 }, { "epoch": 8.35, "learning_rate": 2.2170987464562626e-05, "loss": 2.6284, "step": 325900 }, { "epoch": 8.35, "learning_rate": 2.2162448338286027e-05, "loss": 2.5589, "step": 326000 }, { "epoch": 8.35, "learning_rate": 2.2153909212009426e-05, "loss": 2.5782, "step": 326100 }, { "epoch": 8.36, "learning_rate": 2.2145370085732827e-05, "loss": 2.6423, "step": 326200 }, { "epoch": 8.36, "learning_rate": 2.213683095945623e-05, "loss": 2.5087, "step": 326300 }, { "epoch": 8.36, "learning_rate": 2.212829183317963e-05, "loss": 2.4429, "step": 326400 }, { "epoch": 8.36, "learning_rate": 2.211975270690303e-05, "loss": 2.5103, "step": 326500 }, { "epoch": 8.37, "learning_rate": 2.211121358062643e-05, "loss": 2.6208, "step": 326600 }, { "epoch": 8.37, "learning_rate": 2.210267445434983e-05, "loss": 2.5342, "step": 326700 }, { "epoch": 8.37, "learning_rate": 2.2094135328073233e-05, "loss": 2.3838, "step": 326800 }, { "epoch": 8.37, "learning_rate": 2.208559620179663e-05, "loss": 2.4461, "step": 326900 }, { "epoch": 8.38, "learning_rate": 2.2077057075520033e-05, "loss": 2.4148, "step": 327000 }, { "epoch": 8.38, "learning_rate": 2.2068517949243434e-05, "loss": 2.5646, "step": 327100 }, { "epoch": 8.38, "learning_rate": 2.2059978822966836e-05, "loss": 2.4863, "step": 327200 }, { "epoch": 8.38, "learning_rate": 2.2051439696690234e-05, "loss": 2.5528, "step": 327300 }, { "epoch": 8.39, "learning_rate": 2.2042900570413635e-05, "loss": 2.5117, "step": 327400 }, { "epoch": 8.39, "learning_rate": 2.2034361444137037e-05, "loss": 2.5939, "step": 327500 }, { "epoch": 8.39, "learning_rate": 2.2025822317860435e-05, "loss": 2.4854, "step": 327600 }, { "epoch": 8.39, "learning_rate": 2.2017283191583837e-05, "loss": 2.6956, "step": 327700 }, { "epoch": 8.4, "learning_rate": 2.2008744065307238e-05, "loss": 2.7891, "step": 327800 }, { "epoch": 8.4, "learning_rate": 2.200020493903064e-05, "loss": 2.6001, "step": 327900 }, { "epoch": 8.4, "learning_rate": 2.1991665812754038e-05, "loss": 2.5006, "step": 328000 }, { "epoch": 8.41, "learning_rate": 2.198312668647744e-05, "loss": 2.5285, "step": 328100 }, { "epoch": 8.41, "learning_rate": 2.197458756020084e-05, "loss": 2.3425, "step": 328200 }, { "epoch": 8.41, "learning_rate": 2.1966048433924243e-05, "loss": 2.3846, "step": 328300 }, { "epoch": 8.41, "learning_rate": 2.195750930764764e-05, "loss": 2.4363, "step": 328400 }, { "epoch": 8.42, "learning_rate": 2.1948970181371042e-05, "loss": 2.4927, "step": 328500 }, { "epoch": 8.42, "learning_rate": 2.1940431055094444e-05, "loss": 2.5043, "step": 328600 }, { "epoch": 8.42, "learning_rate": 2.1931891928817842e-05, "loss": 2.4685, "step": 328700 }, { "epoch": 8.42, "learning_rate": 2.1923352802541244e-05, "loss": 2.5255, "step": 328800 }, { "epoch": 8.43, "learning_rate": 2.1914813676264645e-05, "loss": 2.4924, "step": 328900 }, { "epoch": 8.43, "learning_rate": 2.1906274549988047e-05, "loss": 2.5157, "step": 329000 }, { "epoch": 8.43, "learning_rate": 2.1897735423711445e-05, "loss": 2.6053, "step": 329100 }, { "epoch": 8.43, "learning_rate": 2.1889196297434846e-05, "loss": 2.3203, "step": 329200 }, { "epoch": 8.44, "learning_rate": 2.1880657171158248e-05, "loss": 2.6072, "step": 329300 }, { "epoch": 8.44, "learning_rate": 2.187211804488165e-05, "loss": 2.4769, "step": 329400 }, { "epoch": 8.44, "learning_rate": 2.1863578918605048e-05, "loss": 2.7022, "step": 329500 }, { "epoch": 8.44, "learning_rate": 2.185503979232845e-05, "loss": 2.6139, "step": 329600 }, { "epoch": 8.45, "learning_rate": 2.184650066605185e-05, "loss": 2.6004, "step": 329700 }, { "epoch": 8.45, "learning_rate": 2.1837961539775252e-05, "loss": 2.5469, "step": 329800 }, { "epoch": 8.45, "learning_rate": 2.182942241349865e-05, "loss": 2.6072, "step": 329900 }, { "epoch": 8.45, "learning_rate": 2.1820883287222052e-05, "loss": 2.3999, "step": 330000 }, { "epoch": 8.46, "learning_rate": 2.1812344160945454e-05, "loss": 2.6583, "step": 330100 }, { "epoch": 8.46, "learning_rate": 2.1803805034668852e-05, "loss": 2.4339, "step": 330200 }, { "epoch": 8.46, "learning_rate": 2.1795265908392253e-05, "loss": 2.5644, "step": 330300 }, { "epoch": 8.46, "learning_rate": 2.1786726782115655e-05, "loss": 2.4128, "step": 330400 }, { "epoch": 8.47, "learning_rate": 2.1778187655839056e-05, "loss": 2.5575, "step": 330500 }, { "epoch": 8.47, "learning_rate": 2.1769648529562455e-05, "loss": 2.6073, "step": 330600 }, { "epoch": 8.47, "learning_rate": 2.1761109403285856e-05, "loss": 2.5025, "step": 330700 }, { "epoch": 8.47, "learning_rate": 2.1752570277009258e-05, "loss": 2.5701, "step": 330800 }, { "epoch": 8.48, "learning_rate": 2.174403115073266e-05, "loss": 2.5583, "step": 330900 }, { "epoch": 8.48, "learning_rate": 2.1735492024456057e-05, "loss": 2.5079, "step": 331000 }, { "epoch": 8.48, "learning_rate": 2.172695289817946e-05, "loss": 2.591, "step": 331100 }, { "epoch": 8.48, "learning_rate": 2.171841377190286e-05, "loss": 2.5121, "step": 331200 }, { "epoch": 8.49, "learning_rate": 2.1709874645626262e-05, "loss": 2.5391, "step": 331300 }, { "epoch": 8.49, "learning_rate": 2.170133551934966e-05, "loss": 2.6695, "step": 331400 }, { "epoch": 8.49, "learning_rate": 2.169279639307306e-05, "loss": 2.2796, "step": 331500 }, { "epoch": 8.49, "learning_rate": 2.1684257266796463e-05, "loss": 2.6824, "step": 331600 }, { "epoch": 8.5, "learning_rate": 2.167571814051986e-05, "loss": 2.6698, "step": 331700 }, { "epoch": 8.5, "learning_rate": 2.1667179014243263e-05, "loss": 2.5086, "step": 331800 }, { "epoch": 8.5, "learning_rate": 2.1658639887966664e-05, "loss": 2.6209, "step": 331900 }, { "epoch": 8.5, "learning_rate": 2.1650100761690066e-05, "loss": 2.5076, "step": 332000 }, { "epoch": 8.51, "learning_rate": 2.1641561635413464e-05, "loss": 2.5979, "step": 332100 }, { "epoch": 8.51, "learning_rate": 2.1633022509136866e-05, "loss": 2.4665, "step": 332200 }, { "epoch": 8.51, "learning_rate": 2.1624483382860267e-05, "loss": 2.5746, "step": 332300 }, { "epoch": 8.52, "learning_rate": 2.161594425658367e-05, "loss": 2.3524, "step": 332400 }, { "epoch": 8.52, "learning_rate": 2.1607405130307067e-05, "loss": 2.3685, "step": 332500 }, { "epoch": 8.52, "learning_rate": 2.159886600403047e-05, "loss": 2.5041, "step": 332600 }, { "epoch": 8.52, "learning_rate": 2.159032687775387e-05, "loss": 2.5543, "step": 332700 }, { "epoch": 8.53, "learning_rate": 2.1581787751477268e-05, "loss": 2.5392, "step": 332800 }, { "epoch": 8.53, "learning_rate": 2.157324862520067e-05, "loss": 2.41, "step": 332900 }, { "epoch": 8.53, "learning_rate": 2.156470949892407e-05, "loss": 2.4839, "step": 333000 }, { "epoch": 8.53, "learning_rate": 2.1556170372647473e-05, "loss": 2.5286, "step": 333100 }, { "epoch": 8.54, "learning_rate": 2.154763124637087e-05, "loss": 2.3548, "step": 333200 }, { "epoch": 8.54, "learning_rate": 2.1539092120094273e-05, "loss": 2.6003, "step": 333300 }, { "epoch": 8.54, "learning_rate": 2.1530552993817674e-05, "loss": 2.5088, "step": 333400 }, { "epoch": 8.54, "learning_rate": 2.1522013867541076e-05, "loss": 2.5957, "step": 333500 }, { "epoch": 8.55, "learning_rate": 2.1513474741264474e-05, "loss": 2.589, "step": 333600 }, { "epoch": 8.55, "learning_rate": 2.1504935614987875e-05, "loss": 2.4592, "step": 333700 }, { "epoch": 8.55, "learning_rate": 2.1496396488711277e-05, "loss": 2.5387, "step": 333800 }, { "epoch": 8.55, "learning_rate": 2.148785736243468e-05, "loss": 2.4205, "step": 333900 }, { "epoch": 8.56, "learning_rate": 2.1479318236158077e-05, "loss": 2.5185, "step": 334000 }, { "epoch": 8.56, "learning_rate": 2.1470779109881478e-05, "loss": 2.437, "step": 334100 }, { "epoch": 8.56, "learning_rate": 2.146223998360488e-05, "loss": 2.4597, "step": 334200 }, { "epoch": 8.56, "learning_rate": 2.1453700857328278e-05, "loss": 2.419, "step": 334300 }, { "epoch": 8.57, "learning_rate": 2.144516173105168e-05, "loss": 2.3632, "step": 334400 }, { "epoch": 8.57, "learning_rate": 2.143662260477508e-05, "loss": 2.458, "step": 334500 }, { "epoch": 8.57, "learning_rate": 2.1428083478498483e-05, "loss": 2.4865, "step": 334600 }, { "epoch": 8.57, "learning_rate": 2.141954435222188e-05, "loss": 2.647, "step": 334700 }, { "epoch": 8.58, "learning_rate": 2.1411005225945282e-05, "loss": 2.6055, "step": 334800 }, { "epoch": 8.58, "learning_rate": 2.1402466099668684e-05, "loss": 2.4032, "step": 334900 }, { "epoch": 8.58, "learning_rate": 2.1393926973392085e-05, "loss": 2.5299, "step": 335000 }, { "epoch": 8.58, "learning_rate": 2.1385387847115484e-05, "loss": 2.4994, "step": 335100 }, { "epoch": 8.59, "learning_rate": 2.1376848720838885e-05, "loss": 2.5819, "step": 335200 }, { "epoch": 8.59, "learning_rate": 2.1368309594562287e-05, "loss": 2.3387, "step": 335300 }, { "epoch": 8.59, "learning_rate": 2.1359770468285685e-05, "loss": 2.5313, "step": 335400 }, { "epoch": 8.59, "learning_rate": 2.1351231342009086e-05, "loss": 2.5869, "step": 335500 }, { "epoch": 8.6, "learning_rate": 2.1342692215732488e-05, "loss": 2.475, "step": 335600 }, { "epoch": 8.6, "learning_rate": 2.133415308945589e-05, "loss": 2.5618, "step": 335700 }, { "epoch": 8.6, "learning_rate": 2.1325613963179288e-05, "loss": 2.6261, "step": 335800 }, { "epoch": 8.6, "learning_rate": 2.131707483690269e-05, "loss": 2.6525, "step": 335900 }, { "epoch": 8.61, "learning_rate": 2.130853571062609e-05, "loss": 2.67, "step": 336000 }, { "epoch": 8.61, "learning_rate": 2.1299996584349492e-05, "loss": 2.5276, "step": 336100 }, { "epoch": 8.61, "learning_rate": 2.129145745807289e-05, "loss": 2.5303, "step": 336200 }, { "epoch": 8.62, "learning_rate": 2.1282918331796292e-05, "loss": 2.4886, "step": 336300 }, { "epoch": 8.62, "learning_rate": 2.1274379205519693e-05, "loss": 2.4955, "step": 336400 }, { "epoch": 8.62, "learning_rate": 2.1265840079243095e-05, "loss": 2.5971, "step": 336500 }, { "epoch": 8.62, "learning_rate": 2.1257300952966493e-05, "loss": 2.5558, "step": 336600 }, { "epoch": 8.63, "learning_rate": 2.1248761826689895e-05, "loss": 2.5548, "step": 336700 }, { "epoch": 8.63, "learning_rate": 2.1240222700413296e-05, "loss": 2.4898, "step": 336800 }, { "epoch": 8.63, "learning_rate": 2.1231683574136694e-05, "loss": 2.5943, "step": 336900 }, { "epoch": 8.63, "learning_rate": 2.1223144447860096e-05, "loss": 2.5476, "step": 337000 }, { "epoch": 8.64, "learning_rate": 2.1214605321583498e-05, "loss": 2.3956, "step": 337100 }, { "epoch": 8.64, "learning_rate": 2.12060661953069e-05, "loss": 2.5946, "step": 337200 }, { "epoch": 8.64, "learning_rate": 2.1197527069030297e-05, "loss": 2.5176, "step": 337300 }, { "epoch": 8.64, "learning_rate": 2.11889879427537e-05, "loss": 2.5441, "step": 337400 }, { "epoch": 8.65, "learning_rate": 2.11804488164771e-05, "loss": 2.4069, "step": 337500 }, { "epoch": 8.65, "learning_rate": 2.1171909690200502e-05, "loss": 2.4024, "step": 337600 }, { "epoch": 8.65, "learning_rate": 2.11633705639239e-05, "loss": 2.5113, "step": 337700 }, { "epoch": 8.65, "learning_rate": 2.11548314376473e-05, "loss": 2.5701, "step": 337800 }, { "epoch": 8.66, "learning_rate": 2.1146292311370703e-05, "loss": 2.4322, "step": 337900 }, { "epoch": 8.66, "learning_rate": 2.11377531850941e-05, "loss": 2.4589, "step": 338000 }, { "epoch": 8.66, "learning_rate": 2.1129214058817503e-05, "loss": 2.6578, "step": 338100 }, { "epoch": 8.66, "learning_rate": 2.1120674932540904e-05, "loss": 2.4323, "step": 338200 }, { "epoch": 8.67, "learning_rate": 2.1112135806264306e-05, "loss": 2.4691, "step": 338300 }, { "epoch": 8.67, "learning_rate": 2.1103596679987704e-05, "loss": 2.5593, "step": 338400 }, { "epoch": 8.67, "learning_rate": 2.1095057553711106e-05, "loss": 2.6265, "step": 338500 }, { "epoch": 8.67, "learning_rate": 2.1086518427434507e-05, "loss": 2.6088, "step": 338600 }, { "epoch": 8.68, "learning_rate": 2.107797930115791e-05, "loss": 2.1967, "step": 338700 }, { "epoch": 8.68, "learning_rate": 2.1069440174881307e-05, "loss": 2.5874, "step": 338800 }, { "epoch": 8.68, "learning_rate": 2.106090104860471e-05, "loss": 2.3878, "step": 338900 }, { "epoch": 8.68, "learning_rate": 2.105236192232811e-05, "loss": 2.4317, "step": 339000 }, { "epoch": 8.69, "learning_rate": 2.104382279605151e-05, "loss": 2.4738, "step": 339100 }, { "epoch": 8.69, "learning_rate": 2.103528366977491e-05, "loss": 2.5272, "step": 339200 }, { "epoch": 8.69, "learning_rate": 2.102674454349831e-05, "loss": 2.5015, "step": 339300 }, { "epoch": 8.69, "learning_rate": 2.1018205417221713e-05, "loss": 2.521, "step": 339400 }, { "epoch": 8.7, "learning_rate": 2.100966629094511e-05, "loss": 2.5521, "step": 339500 }, { "epoch": 8.7, "learning_rate": 2.1001127164668513e-05, "loss": 2.58, "step": 339600 }, { "epoch": 8.7, "learning_rate": 2.0992588038391914e-05, "loss": 2.5704, "step": 339700 }, { "epoch": 8.7, "learning_rate": 2.0984048912115316e-05, "loss": 2.588, "step": 339800 }, { "epoch": 8.71, "learning_rate": 2.0975509785838714e-05, "loss": 2.6017, "step": 339900 }, { "epoch": 8.71, "learning_rate": 2.0966970659562115e-05, "loss": 2.5664, "step": 340000 }, { "epoch": 8.71, "learning_rate": 2.0958431533285517e-05, "loss": 2.493, "step": 340100 }, { "epoch": 8.72, "learning_rate": 2.094989240700892e-05, "loss": 2.6741, "step": 340200 }, { "epoch": 8.72, "learning_rate": 2.0941353280732317e-05, "loss": 2.2934, "step": 340300 }, { "epoch": 8.72, "learning_rate": 2.0932814154455718e-05, "loss": 2.4713, "step": 340400 }, { "epoch": 8.72, "learning_rate": 2.092427502817912e-05, "loss": 2.6073, "step": 340500 }, { "epoch": 8.73, "learning_rate": 2.0915735901902518e-05, "loss": 2.5486, "step": 340600 }, { "epoch": 8.73, "learning_rate": 2.090719677562592e-05, "loss": 2.5145, "step": 340700 }, { "epoch": 8.73, "learning_rate": 2.089865764934932e-05, "loss": 2.592, "step": 340800 }, { "epoch": 8.73, "learning_rate": 2.0890118523072723e-05, "loss": 2.7031, "step": 340900 }, { "epoch": 8.74, "learning_rate": 2.088157939679612e-05, "loss": 2.3675, "step": 341000 }, { "epoch": 8.74, "learning_rate": 2.0873040270519522e-05, "loss": 2.6031, "step": 341100 }, { "epoch": 8.74, "learning_rate": 2.0864501144242924e-05, "loss": 2.4581, "step": 341200 }, { "epoch": 8.74, "learning_rate": 2.0855962017966325e-05, "loss": 2.6458, "step": 341300 }, { "epoch": 8.75, "learning_rate": 2.0847422891689723e-05, "loss": 2.6926, "step": 341400 }, { "epoch": 8.75, "learning_rate": 2.0838883765413125e-05, "loss": 2.5385, "step": 341500 }, { "epoch": 8.75, "learning_rate": 2.0830344639136527e-05, "loss": 2.5288, "step": 341600 }, { "epoch": 8.75, "learning_rate": 2.0821805512859928e-05, "loss": 2.4888, "step": 341700 }, { "epoch": 8.76, "learning_rate": 2.0813266386583326e-05, "loss": 2.5076, "step": 341800 }, { "epoch": 8.76, "learning_rate": 2.0804727260306724e-05, "loss": 2.3994, "step": 341900 }, { "epoch": 8.76, "learning_rate": 2.0796188134030126e-05, "loss": 2.6172, "step": 342000 }, { "epoch": 8.76, "learning_rate": 2.0787649007753528e-05, "loss": 2.5673, "step": 342100 }, { "epoch": 8.77, "learning_rate": 2.0779109881476926e-05, "loss": 2.519, "step": 342200 }, { "epoch": 8.77, "learning_rate": 2.0770570755200327e-05, "loss": 2.6818, "step": 342300 }, { "epoch": 8.77, "learning_rate": 2.076203162892373e-05, "loss": 2.4862, "step": 342400 }, { "epoch": 8.77, "learning_rate": 2.075349250264713e-05, "loss": 2.4133, "step": 342500 }, { "epoch": 8.78, "learning_rate": 2.074495337637053e-05, "loss": 2.6491, "step": 342600 }, { "epoch": 8.78, "learning_rate": 2.073641425009393e-05, "loss": 2.3798, "step": 342700 }, { "epoch": 8.78, "learning_rate": 2.072787512381733e-05, "loss": 2.4769, "step": 342800 }, { "epoch": 8.78, "learning_rate": 2.071933599754073e-05, "loss": 2.5758, "step": 342900 }, { "epoch": 8.79, "learning_rate": 2.071079687126413e-05, "loss": 2.5529, "step": 343000 }, { "epoch": 8.79, "learning_rate": 2.0702257744987533e-05, "loss": 2.288, "step": 343100 }, { "epoch": 8.79, "learning_rate": 2.0693718618710934e-05, "loss": 2.5995, "step": 343200 }, { "epoch": 8.79, "learning_rate": 2.0685179492434333e-05, "loss": 2.4792, "step": 343300 }, { "epoch": 8.8, "learning_rate": 2.0676640366157734e-05, "loss": 2.4033, "step": 343400 }, { "epoch": 8.8, "learning_rate": 2.0668101239881136e-05, "loss": 2.5598, "step": 343500 }, { "epoch": 8.8, "learning_rate": 2.0659562113604537e-05, "loss": 2.5351, "step": 343600 }, { "epoch": 8.8, "learning_rate": 2.0651022987327935e-05, "loss": 2.3781, "step": 343700 }, { "epoch": 8.81, "learning_rate": 2.0642483861051337e-05, "loss": 2.2974, "step": 343800 }, { "epoch": 8.81, "learning_rate": 2.063394473477474e-05, "loss": 2.5745, "step": 343900 }, { "epoch": 8.81, "learning_rate": 2.062540560849814e-05, "loss": 2.4677, "step": 344000 }, { "epoch": 8.81, "learning_rate": 2.0616866482221538e-05, "loss": 2.4528, "step": 344100 }, { "epoch": 8.82, "learning_rate": 2.060832735594494e-05, "loss": 2.524, "step": 344200 }, { "epoch": 8.82, "learning_rate": 2.059978822966834e-05, "loss": 2.5958, "step": 344300 }, { "epoch": 8.82, "learning_rate": 2.059124910339174e-05, "loss": 2.6228, "step": 344400 }, { "epoch": 8.83, "learning_rate": 2.058270997711514e-05, "loss": 2.5172, "step": 344500 }, { "epoch": 8.83, "learning_rate": 2.0574170850838543e-05, "loss": 2.5492, "step": 344600 }, { "epoch": 8.83, "learning_rate": 2.0565631724561944e-05, "loss": 2.5259, "step": 344700 }, { "epoch": 8.83, "learning_rate": 2.0557092598285342e-05, "loss": 2.494, "step": 344800 }, { "epoch": 8.84, "learning_rate": 2.0548553472008744e-05, "loss": 2.601, "step": 344900 }, { "epoch": 8.84, "learning_rate": 2.0540014345732145e-05, "loss": 2.4916, "step": 345000 }, { "epoch": 8.84, "learning_rate": 2.0531475219455547e-05, "loss": 2.6532, "step": 345100 }, { "epoch": 8.84, "learning_rate": 2.0522936093178945e-05, "loss": 2.574, "step": 345200 }, { "epoch": 8.85, "learning_rate": 2.0514396966902347e-05, "loss": 2.483, "step": 345300 }, { "epoch": 8.85, "learning_rate": 2.0505857840625748e-05, "loss": 2.4472, "step": 345400 }, { "epoch": 8.85, "learning_rate": 2.0497318714349146e-05, "loss": 2.5283, "step": 345500 }, { "epoch": 8.85, "learning_rate": 2.0488779588072548e-05, "loss": 2.4242, "step": 345600 }, { "epoch": 8.86, "learning_rate": 2.048024046179595e-05, "loss": 2.311, "step": 345700 }, { "epoch": 8.86, "learning_rate": 2.047170133551935e-05, "loss": 2.3901, "step": 345800 }, { "epoch": 8.86, "learning_rate": 2.046316220924275e-05, "loss": 2.4731, "step": 345900 }, { "epoch": 8.86, "learning_rate": 2.045462308296615e-05, "loss": 2.423, "step": 346000 }, { "epoch": 8.87, "learning_rate": 2.0446083956689552e-05, "loss": 2.5762, "step": 346100 }, { "epoch": 8.87, "learning_rate": 2.0437544830412954e-05, "loss": 2.5543, "step": 346200 }, { "epoch": 8.87, "learning_rate": 2.0429005704136352e-05, "loss": 2.5611, "step": 346300 }, { "epoch": 8.87, "learning_rate": 2.0420466577859753e-05, "loss": 2.5478, "step": 346400 }, { "epoch": 8.88, "learning_rate": 2.0411927451583155e-05, "loss": 2.5788, "step": 346500 }, { "epoch": 8.88, "learning_rate": 2.0403388325306557e-05, "loss": 2.6245, "step": 346600 }, { "epoch": 8.88, "learning_rate": 2.0394849199029955e-05, "loss": 2.4401, "step": 346700 }, { "epoch": 8.88, "learning_rate": 2.0386310072753356e-05, "loss": 2.3803, "step": 346800 }, { "epoch": 8.89, "learning_rate": 2.0377770946476758e-05, "loss": 2.4374, "step": 346900 }, { "epoch": 8.89, "learning_rate": 2.0369231820200156e-05, "loss": 2.7678, "step": 347000 }, { "epoch": 8.89, "learning_rate": 2.0360692693923558e-05, "loss": 2.513, "step": 347100 }, { "epoch": 8.89, "learning_rate": 2.035215356764696e-05, "loss": 2.646, "step": 347200 }, { "epoch": 8.9, "learning_rate": 2.034361444137036e-05, "loss": 2.4675, "step": 347300 }, { "epoch": 8.9, "learning_rate": 2.033507531509376e-05, "loss": 2.5015, "step": 347400 }, { "epoch": 8.9, "learning_rate": 2.032653618881716e-05, "loss": 2.3507, "step": 347500 }, { "epoch": 8.9, "learning_rate": 2.0317997062540562e-05, "loss": 2.4468, "step": 347600 }, { "epoch": 8.91, "learning_rate": 2.0309457936263963e-05, "loss": 2.4786, "step": 347700 }, { "epoch": 8.91, "learning_rate": 2.030091880998736e-05, "loss": 2.5348, "step": 347800 }, { "epoch": 8.91, "learning_rate": 2.0292379683710763e-05, "loss": 2.6023, "step": 347900 }, { "epoch": 8.91, "learning_rate": 2.0283840557434165e-05, "loss": 2.3332, "step": 348000 }, { "epoch": 8.92, "learning_rate": 2.0275301431157566e-05, "loss": 2.6247, "step": 348100 }, { "epoch": 8.92, "learning_rate": 2.0266762304880964e-05, "loss": 2.5805, "step": 348200 }, { "epoch": 8.92, "learning_rate": 2.0258223178604366e-05, "loss": 2.4321, "step": 348300 }, { "epoch": 8.93, "learning_rate": 2.0249684052327767e-05, "loss": 2.3058, "step": 348400 }, { "epoch": 8.93, "learning_rate": 2.0241144926051166e-05, "loss": 2.2775, "step": 348500 }, { "epoch": 8.93, "learning_rate": 2.0232605799774567e-05, "loss": 2.6537, "step": 348600 }, { "epoch": 8.93, "learning_rate": 2.022406667349797e-05, "loss": 2.6038, "step": 348700 }, { "epoch": 8.94, "learning_rate": 2.021552754722137e-05, "loss": 2.4554, "step": 348800 }, { "epoch": 8.94, "learning_rate": 2.020698842094477e-05, "loss": 2.4168, "step": 348900 }, { "epoch": 8.94, "learning_rate": 2.019844929466817e-05, "loss": 2.4945, "step": 349000 }, { "epoch": 8.94, "learning_rate": 2.018991016839157e-05, "loss": 2.4791, "step": 349100 }, { "epoch": 8.95, "learning_rate": 2.0181371042114973e-05, "loss": 2.3889, "step": 349200 }, { "epoch": 8.95, "learning_rate": 2.017283191583837e-05, "loss": 2.4123, "step": 349300 }, { "epoch": 8.95, "learning_rate": 2.0164292789561773e-05, "loss": 2.4595, "step": 349400 }, { "epoch": 8.95, "learning_rate": 2.0155753663285174e-05, "loss": 2.3611, "step": 349500 }, { "epoch": 8.96, "learning_rate": 2.0147214537008572e-05, "loss": 2.5504, "step": 349600 }, { "epoch": 8.96, "learning_rate": 2.0138675410731974e-05, "loss": 2.4024, "step": 349700 }, { "epoch": 8.96, "learning_rate": 2.0130136284455376e-05, "loss": 2.6099, "step": 349800 }, { "epoch": 8.96, "learning_rate": 2.0121597158178777e-05, "loss": 2.5468, "step": 349900 }, { "epoch": 8.97, "learning_rate": 2.0113058031902175e-05, "loss": 2.3558, "step": 350000 }, { "epoch": 8.97, "learning_rate": 2.0104518905625577e-05, "loss": 2.5213, "step": 350100 }, { "epoch": 8.97, "learning_rate": 2.009597977934898e-05, "loss": 2.3691, "step": 350200 }, { "epoch": 8.97, "learning_rate": 2.008744065307238e-05, "loss": 2.6007, "step": 350300 }, { "epoch": 8.98, "learning_rate": 2.0078901526795778e-05, "loss": 2.5096, "step": 350400 }, { "epoch": 8.98, "learning_rate": 2.007036240051918e-05, "loss": 2.5878, "step": 350500 }, { "epoch": 8.98, "learning_rate": 2.006182327424258e-05, "loss": 2.6019, "step": 350600 }, { "epoch": 8.98, "learning_rate": 2.0053284147965983e-05, "loss": 2.5281, "step": 350700 }, { "epoch": 8.99, "learning_rate": 2.004474502168938e-05, "loss": 2.3759, "step": 350800 }, { "epoch": 8.99, "learning_rate": 2.0036205895412782e-05, "loss": 2.4643, "step": 350900 }, { "epoch": 8.99, "learning_rate": 2.0027666769136184e-05, "loss": 2.2171, "step": 351000 }, { "epoch": 8.99, "learning_rate": 2.0019127642859582e-05, "loss": 2.5775, "step": 351100 }, { "epoch": 9.0, "learning_rate": 2.0010588516582984e-05, "loss": 2.36, "step": 351200 }, { "epoch": 9.0, "learning_rate": 2.0002049390306385e-05, "loss": 2.3243, "step": 351300 }, { "epoch": 9.0, "learning_rate": 1.9993510264029787e-05, "loss": 2.4882, "step": 351400 }, { "epoch": 9.0, "learning_rate": 1.9984971137753185e-05, "loss": 2.5443, "step": 351500 }, { "epoch": 9.01, "learning_rate": 1.9976432011476587e-05, "loss": 2.486, "step": 351600 }, { "epoch": 9.01, "learning_rate": 1.9967892885199988e-05, "loss": 2.3474, "step": 351700 }, { "epoch": 9.01, "learning_rate": 1.995935375892339e-05, "loss": 2.3448, "step": 351800 }, { "epoch": 9.01, "learning_rate": 1.9950814632646788e-05, "loss": 2.4835, "step": 351900 }, { "epoch": 9.02, "learning_rate": 1.994227550637019e-05, "loss": 2.3045, "step": 352000 }, { "epoch": 9.02, "learning_rate": 1.993373638009359e-05, "loss": 2.5893, "step": 352100 }, { "epoch": 9.02, "learning_rate": 1.992519725381699e-05, "loss": 2.5106, "step": 352200 }, { "epoch": 9.03, "learning_rate": 1.991665812754039e-05, "loss": 2.4664, "step": 352300 }, { "epoch": 9.03, "learning_rate": 1.9908119001263792e-05, "loss": 2.5472, "step": 352400 }, { "epoch": 9.03, "learning_rate": 1.9899579874987194e-05, "loss": 2.6176, "step": 352500 }, { "epoch": 9.03, "learning_rate": 1.9891040748710592e-05, "loss": 2.5066, "step": 352600 }, { "epoch": 9.04, "learning_rate": 1.9882501622433993e-05, "loss": 2.4152, "step": 352700 }, { "epoch": 9.04, "learning_rate": 1.9873962496157395e-05, "loss": 2.2691, "step": 352800 }, { "epoch": 9.04, "learning_rate": 1.9865423369880796e-05, "loss": 2.4565, "step": 352900 }, { "epoch": 9.04, "learning_rate": 1.9856884243604195e-05, "loss": 2.4942, "step": 353000 }, { "epoch": 9.05, "learning_rate": 1.9848345117327596e-05, "loss": 2.5106, "step": 353100 }, { "epoch": 9.05, "learning_rate": 1.9839805991050998e-05, "loss": 2.4466, "step": 353200 }, { "epoch": 9.05, "learning_rate": 1.98312668647744e-05, "loss": 2.5558, "step": 353300 }, { "epoch": 9.05, "learning_rate": 1.9822727738497797e-05, "loss": 2.7191, "step": 353400 }, { "epoch": 9.06, "learning_rate": 1.98141886122212e-05, "loss": 2.5976, "step": 353500 }, { "epoch": 9.06, "learning_rate": 1.98056494859446e-05, "loss": 2.425, "step": 353600 }, { "epoch": 9.06, "learning_rate": 1.9797110359668e-05, "loss": 2.3932, "step": 353700 }, { "epoch": 9.06, "learning_rate": 1.97885712333914e-05, "loss": 2.4325, "step": 353800 }, { "epoch": 9.07, "learning_rate": 1.9780032107114802e-05, "loss": 2.3956, "step": 353900 }, { "epoch": 9.07, "learning_rate": 1.9771492980838203e-05, "loss": 2.5554, "step": 354000 }, { "epoch": 9.07, "learning_rate": 1.97629538545616e-05, "loss": 2.3288, "step": 354100 }, { "epoch": 9.07, "learning_rate": 1.9754414728285003e-05, "loss": 2.5212, "step": 354200 }, { "epoch": 9.08, "learning_rate": 1.9745875602008405e-05, "loss": 2.4385, "step": 354300 }, { "epoch": 9.08, "learning_rate": 1.9737336475731806e-05, "loss": 2.5364, "step": 354400 }, { "epoch": 9.08, "learning_rate": 1.9728797349455204e-05, "loss": 2.5373, "step": 354500 }, { "epoch": 9.08, "learning_rate": 1.9720258223178606e-05, "loss": 2.3519, "step": 354600 }, { "epoch": 9.09, "learning_rate": 1.9711719096902007e-05, "loss": 2.6551, "step": 354700 }, { "epoch": 9.09, "learning_rate": 1.9703179970625406e-05, "loss": 2.4575, "step": 354800 }, { "epoch": 9.09, "learning_rate": 1.9694640844348807e-05, "loss": 2.3641, "step": 354900 }, { "epoch": 9.09, "learning_rate": 1.968610171807221e-05, "loss": 2.5151, "step": 355000 }, { "epoch": 9.1, "learning_rate": 1.967756259179561e-05, "loss": 2.4773, "step": 355100 }, { "epoch": 9.1, "learning_rate": 1.966902346551901e-05, "loss": 2.3488, "step": 355200 }, { "epoch": 9.1, "learning_rate": 1.966048433924241e-05, "loss": 2.5454, "step": 355300 }, { "epoch": 9.1, "learning_rate": 1.965194521296581e-05, "loss": 2.38, "step": 355400 }, { "epoch": 9.11, "learning_rate": 1.9643406086689213e-05, "loss": 2.3771, "step": 355500 }, { "epoch": 9.11, "learning_rate": 1.963486696041261e-05, "loss": 2.5326, "step": 355600 }, { "epoch": 9.11, "learning_rate": 1.9626327834136013e-05, "loss": 2.4057, "step": 355700 }, { "epoch": 9.11, "learning_rate": 1.9617788707859414e-05, "loss": 2.2275, "step": 355800 }, { "epoch": 9.12, "learning_rate": 1.9609249581582816e-05, "loss": 2.4473, "step": 355900 }, { "epoch": 9.12, "learning_rate": 1.9600710455306214e-05, "loss": 2.3546, "step": 356000 }, { "epoch": 9.12, "learning_rate": 1.9592171329029616e-05, "loss": 2.3716, "step": 356100 }, { "epoch": 9.12, "learning_rate": 1.9583632202753017e-05, "loss": 2.4092, "step": 356200 }, { "epoch": 9.13, "learning_rate": 1.9575093076476415e-05, "loss": 2.4315, "step": 356300 }, { "epoch": 9.13, "learning_rate": 1.9566553950199817e-05, "loss": 2.454, "step": 356400 }, { "epoch": 9.13, "learning_rate": 1.955801482392322e-05, "loss": 2.5591, "step": 356500 }, { "epoch": 9.14, "learning_rate": 1.954947569764662e-05, "loss": 2.4718, "step": 356600 }, { "epoch": 9.14, "learning_rate": 1.9540936571370018e-05, "loss": 2.2909, "step": 356700 }, { "epoch": 9.14, "learning_rate": 1.953239744509342e-05, "loss": 2.6231, "step": 356800 }, { "epoch": 9.14, "learning_rate": 1.952385831881682e-05, "loss": 2.3351, "step": 356900 }, { "epoch": 9.15, "learning_rate": 1.9515319192540223e-05, "loss": 2.5459, "step": 357000 }, { "epoch": 9.15, "learning_rate": 1.950678006626362e-05, "loss": 2.5014, "step": 357100 }, { "epoch": 9.15, "learning_rate": 1.9498240939987022e-05, "loss": 2.5505, "step": 357200 }, { "epoch": 9.15, "learning_rate": 1.9489701813710424e-05, "loss": 2.3888, "step": 357300 }, { "epoch": 9.16, "learning_rate": 1.9481162687433822e-05, "loss": 2.5272, "step": 357400 }, { "epoch": 9.16, "learning_rate": 1.9472623561157224e-05, "loss": 2.4352, "step": 357500 }, { "epoch": 9.16, "learning_rate": 1.9464084434880625e-05, "loss": 2.4541, "step": 357600 }, { "epoch": 9.16, "learning_rate": 1.9455545308604027e-05, "loss": 2.2915, "step": 357700 }, { "epoch": 9.17, "learning_rate": 1.9447006182327425e-05, "loss": 2.5464, "step": 357800 }, { "epoch": 9.17, "learning_rate": 1.9438467056050826e-05, "loss": 2.3428, "step": 357900 }, { "epoch": 9.17, "learning_rate": 1.9429927929774228e-05, "loss": 2.3803, "step": 358000 }, { "epoch": 9.17, "learning_rate": 1.942138880349763e-05, "loss": 2.4407, "step": 358100 }, { "epoch": 9.18, "learning_rate": 1.9412849677221028e-05, "loss": 2.4178, "step": 358200 }, { "epoch": 9.18, "learning_rate": 1.940431055094443e-05, "loss": 2.5146, "step": 358300 }, { "epoch": 9.18, "learning_rate": 1.939577142466783e-05, "loss": 2.4201, "step": 358400 }, { "epoch": 9.18, "learning_rate": 1.9387232298391232e-05, "loss": 2.5639, "step": 358500 }, { "epoch": 9.19, "learning_rate": 1.937869317211463e-05, "loss": 2.4283, "step": 358600 }, { "epoch": 9.19, "learning_rate": 1.9370154045838032e-05, "loss": 2.4783, "step": 358700 }, { "epoch": 9.19, "learning_rate": 1.9361614919561434e-05, "loss": 2.5163, "step": 358800 }, { "epoch": 9.19, "learning_rate": 1.9353075793284832e-05, "loss": 2.3794, "step": 358900 }, { "epoch": 9.2, "learning_rate": 1.9344536667008233e-05, "loss": 2.6358, "step": 359000 }, { "epoch": 9.2, "learning_rate": 1.9335997540731635e-05, "loss": 2.5169, "step": 359100 }, { "epoch": 9.2, "learning_rate": 1.9327458414455036e-05, "loss": 2.3942, "step": 359200 }, { "epoch": 9.2, "learning_rate": 1.9318919288178435e-05, "loss": 2.5266, "step": 359300 }, { "epoch": 9.21, "learning_rate": 1.9310380161901833e-05, "loss": 2.4013, "step": 359400 }, { "epoch": 9.21, "learning_rate": 1.9301841035625234e-05, "loss": 2.4278, "step": 359500 }, { "epoch": 9.21, "learning_rate": 1.9293301909348636e-05, "loss": 2.3956, "step": 359600 }, { "epoch": 9.21, "learning_rate": 1.9284762783072034e-05, "loss": 2.4784, "step": 359700 }, { "epoch": 9.22, "learning_rate": 1.9276223656795436e-05, "loss": 2.4385, "step": 359800 }, { "epoch": 9.22, "learning_rate": 1.9267684530518837e-05, "loss": 2.4665, "step": 359900 }, { "epoch": 9.22, "learning_rate": 1.925914540424224e-05, "loss": 2.369, "step": 360000 }, { "epoch": 9.22, "learning_rate": 1.9250606277965637e-05, "loss": 2.4647, "step": 360100 }, { "epoch": 9.23, "learning_rate": 1.924206715168904e-05, "loss": 2.4181, "step": 360200 }, { "epoch": 9.23, "learning_rate": 1.923352802541244e-05, "loss": 2.3927, "step": 360300 }, { "epoch": 9.23, "learning_rate": 1.922498889913584e-05, "loss": 2.4444, "step": 360400 }, { "epoch": 9.24, "learning_rate": 1.921644977285924e-05, "loss": 2.6982, "step": 360500 }, { "epoch": 9.24, "learning_rate": 1.920791064658264e-05, "loss": 2.6013, "step": 360600 }, { "epoch": 9.24, "learning_rate": 1.9199371520306043e-05, "loss": 2.4856, "step": 360700 }, { "epoch": 9.24, "learning_rate": 1.9190832394029444e-05, "loss": 2.5226, "step": 360800 }, { "epoch": 9.25, "learning_rate": 1.9182293267752842e-05, "loss": 2.4428, "step": 360900 }, { "epoch": 9.25, "learning_rate": 1.9173754141476244e-05, "loss": 2.5025, "step": 361000 }, { "epoch": 9.25, "learning_rate": 1.9165215015199646e-05, "loss": 2.4364, "step": 361100 }, { "epoch": 9.25, "learning_rate": 1.9156675888923044e-05, "loss": 2.3382, "step": 361200 }, { "epoch": 9.26, "learning_rate": 1.9148136762646445e-05, "loss": 2.3355, "step": 361300 }, { "epoch": 9.26, "learning_rate": 1.9139597636369847e-05, "loss": 2.3755, "step": 361400 }, { "epoch": 9.26, "learning_rate": 1.913105851009325e-05, "loss": 2.5798, "step": 361500 }, { "epoch": 9.26, "learning_rate": 1.9122519383816646e-05, "loss": 2.5586, "step": 361600 }, { "epoch": 9.27, "learning_rate": 1.9113980257540048e-05, "loss": 2.5779, "step": 361700 }, { "epoch": 9.27, "learning_rate": 1.910544113126345e-05, "loss": 2.4358, "step": 361800 }, { "epoch": 9.27, "learning_rate": 1.909690200498685e-05, "loss": 2.4411, "step": 361900 }, { "epoch": 9.27, "learning_rate": 1.908836287871025e-05, "loss": 2.556, "step": 362000 }, { "epoch": 9.28, "learning_rate": 1.907982375243365e-05, "loss": 2.4819, "step": 362100 }, { "epoch": 9.28, "learning_rate": 1.9071284626157052e-05, "loss": 2.3784, "step": 362200 }, { "epoch": 9.28, "learning_rate": 1.906274549988045e-05, "loss": 2.2813, "step": 362300 }, { "epoch": 9.28, "learning_rate": 1.9054206373603852e-05, "loss": 2.3302, "step": 362400 }, { "epoch": 9.29, "learning_rate": 1.9045667247327254e-05, "loss": 2.5037, "step": 362500 }, { "epoch": 9.29, "learning_rate": 1.9037128121050655e-05, "loss": 2.5283, "step": 362600 }, { "epoch": 9.29, "learning_rate": 1.9028588994774053e-05, "loss": 2.3458, "step": 362700 }, { "epoch": 9.29, "learning_rate": 1.9020049868497455e-05, "loss": 2.4362, "step": 362800 }, { "epoch": 9.3, "learning_rate": 1.9011510742220856e-05, "loss": 2.5364, "step": 362900 }, { "epoch": 9.3, "learning_rate": 1.9002971615944258e-05, "loss": 2.3834, "step": 363000 }, { "epoch": 9.3, "learning_rate": 1.8994432489667656e-05, "loss": 2.4197, "step": 363100 }, { "epoch": 9.3, "learning_rate": 1.8985893363391058e-05, "loss": 2.4561, "step": 363200 }, { "epoch": 9.31, "learning_rate": 1.897735423711446e-05, "loss": 2.4097, "step": 363300 }, { "epoch": 9.31, "learning_rate": 1.896881511083786e-05, "loss": 2.4191, "step": 363400 }, { "epoch": 9.31, "learning_rate": 1.896027598456126e-05, "loss": 2.3627, "step": 363500 }, { "epoch": 9.31, "learning_rate": 1.895173685828466e-05, "loss": 2.5534, "step": 363600 }, { "epoch": 9.32, "learning_rate": 1.8943197732008062e-05, "loss": 2.3401, "step": 363700 }, { "epoch": 9.32, "learning_rate": 1.893465860573146e-05, "loss": 2.5124, "step": 363800 }, { "epoch": 9.32, "learning_rate": 1.8926119479454862e-05, "loss": 2.5377, "step": 363900 }, { "epoch": 9.32, "learning_rate": 1.8917580353178263e-05, "loss": 2.4968, "step": 364000 }, { "epoch": 9.33, "learning_rate": 1.8909041226901665e-05, "loss": 2.5767, "step": 364100 }, { "epoch": 9.33, "learning_rate": 1.8900502100625063e-05, "loss": 2.46, "step": 364200 }, { "epoch": 9.33, "learning_rate": 1.8891962974348465e-05, "loss": 2.2387, "step": 364300 }, { "epoch": 9.33, "learning_rate": 1.8883423848071866e-05, "loss": 2.4068, "step": 364400 }, { "epoch": 9.34, "learning_rate": 1.8874884721795268e-05, "loss": 2.5661, "step": 364500 }, { "epoch": 9.34, "learning_rate": 1.8866345595518666e-05, "loss": 2.5201, "step": 364600 }, { "epoch": 9.34, "learning_rate": 1.8857806469242067e-05, "loss": 2.4884, "step": 364700 }, { "epoch": 9.35, "learning_rate": 1.884926734296547e-05, "loss": 2.6423, "step": 364800 }, { "epoch": 9.35, "learning_rate": 1.884072821668887e-05, "loss": 2.5374, "step": 364900 }, { "epoch": 9.35, "learning_rate": 1.883218909041227e-05, "loss": 2.3762, "step": 365000 }, { "epoch": 9.35, "learning_rate": 1.882364996413567e-05, "loss": 2.4098, "step": 365100 }, { "epoch": 9.36, "learning_rate": 1.8815110837859072e-05, "loss": 2.5898, "step": 365200 }, { "epoch": 9.36, "learning_rate": 1.880657171158247e-05, "loss": 2.3937, "step": 365300 }, { "epoch": 9.36, "learning_rate": 1.879803258530587e-05, "loss": 2.5132, "step": 365400 }, { "epoch": 9.36, "learning_rate": 1.8789493459029273e-05, "loss": 2.3442, "step": 365500 }, { "epoch": 9.37, "learning_rate": 1.8780954332752675e-05, "loss": 2.4829, "step": 365600 }, { "epoch": 9.37, "learning_rate": 1.8772415206476073e-05, "loss": 2.5117, "step": 365700 }, { "epoch": 9.37, "learning_rate": 1.8763876080199474e-05, "loss": 2.4341, "step": 365800 }, { "epoch": 9.37, "learning_rate": 1.8755336953922876e-05, "loss": 2.66, "step": 365900 }, { "epoch": 9.38, "learning_rate": 1.8746797827646277e-05, "loss": 2.3417, "step": 366000 }, { "epoch": 9.38, "learning_rate": 1.8738258701369675e-05, "loss": 2.4579, "step": 366100 }, { "epoch": 9.38, "learning_rate": 1.8729719575093077e-05, "loss": 2.5, "step": 366200 }, { "epoch": 9.38, "learning_rate": 1.872118044881648e-05, "loss": 2.3192, "step": 366300 }, { "epoch": 9.39, "learning_rate": 1.8712641322539877e-05, "loss": 2.5531, "step": 366400 }, { "epoch": 9.39, "learning_rate": 1.8704102196263278e-05, "loss": 2.5078, "step": 366500 }, { "epoch": 9.39, "learning_rate": 1.869556306998668e-05, "loss": 2.3509, "step": 366600 }, { "epoch": 9.39, "learning_rate": 1.868702394371008e-05, "loss": 2.4027, "step": 366700 }, { "epoch": 9.4, "learning_rate": 1.867848481743348e-05, "loss": 2.2721, "step": 366800 }, { "epoch": 9.4, "learning_rate": 1.866994569115688e-05, "loss": 2.3922, "step": 366900 }, { "epoch": 9.4, "learning_rate": 1.8661406564880283e-05, "loss": 2.4485, "step": 367000 }, { "epoch": 9.4, "learning_rate": 1.8652867438603684e-05, "loss": 2.5269, "step": 367100 }, { "epoch": 9.41, "learning_rate": 1.8644328312327082e-05, "loss": 2.4858, "step": 367200 }, { "epoch": 9.41, "learning_rate": 1.8635789186050484e-05, "loss": 2.5382, "step": 367300 }, { "epoch": 9.41, "learning_rate": 1.8627250059773885e-05, "loss": 2.3414, "step": 367400 }, { "epoch": 9.41, "learning_rate": 1.8618710933497287e-05, "loss": 2.4237, "step": 367500 }, { "epoch": 9.42, "learning_rate": 1.8610171807220685e-05, "loss": 2.6329, "step": 367600 }, { "epoch": 9.42, "learning_rate": 1.8601632680944087e-05, "loss": 2.3772, "step": 367700 }, { "epoch": 9.42, "learning_rate": 1.8593093554667488e-05, "loss": 2.3128, "step": 367800 }, { "epoch": 9.42, "learning_rate": 1.8584554428390886e-05, "loss": 2.4733, "step": 367900 }, { "epoch": 9.43, "learning_rate": 1.8576015302114288e-05, "loss": 2.4864, "step": 368000 }, { "epoch": 9.43, "learning_rate": 1.856747617583769e-05, "loss": 2.6485, "step": 368100 }, { "epoch": 9.43, "learning_rate": 1.855893704956109e-05, "loss": 2.4724, "step": 368200 }, { "epoch": 9.43, "learning_rate": 1.855039792328449e-05, "loss": 2.4789, "step": 368300 }, { "epoch": 9.44, "learning_rate": 1.854185879700789e-05, "loss": 2.5142, "step": 368400 }, { "epoch": 9.44, "learning_rate": 1.8533319670731292e-05, "loss": 2.5463, "step": 368500 }, { "epoch": 9.44, "learning_rate": 1.8524780544454694e-05, "loss": 2.3217, "step": 368600 }, { "epoch": 9.45, "learning_rate": 1.8516241418178092e-05, "loss": 2.384, "step": 368700 }, { "epoch": 9.45, "learning_rate": 1.8507702291901494e-05, "loss": 2.4266, "step": 368800 }, { "epoch": 9.45, "learning_rate": 1.8499163165624895e-05, "loss": 2.4248, "step": 368900 }, { "epoch": 9.45, "learning_rate": 1.8490624039348293e-05, "loss": 2.5011, "step": 369000 }, { "epoch": 9.46, "learning_rate": 1.8482084913071695e-05, "loss": 2.3596, "step": 369100 }, { "epoch": 9.46, "learning_rate": 1.8473545786795096e-05, "loss": 2.5138, "step": 369200 }, { "epoch": 9.46, "learning_rate": 1.8465006660518498e-05, "loss": 2.5326, "step": 369300 }, { "epoch": 9.46, "learning_rate": 1.8456467534241896e-05, "loss": 2.7676, "step": 369400 }, { "epoch": 9.47, "learning_rate": 1.8447928407965298e-05, "loss": 2.5415, "step": 369500 }, { "epoch": 9.47, "learning_rate": 1.84393892816887e-05, "loss": 2.4893, "step": 369600 }, { "epoch": 9.47, "learning_rate": 1.84308501554121e-05, "loss": 2.4449, "step": 369700 }, { "epoch": 9.47, "learning_rate": 1.84223110291355e-05, "loss": 2.438, "step": 369800 }, { "epoch": 9.48, "learning_rate": 1.84137719028589e-05, "loss": 2.5547, "step": 369900 }, { "epoch": 9.48, "learning_rate": 1.8405232776582302e-05, "loss": 2.5122, "step": 370000 }, { "epoch": 9.48, "learning_rate": 1.8396693650305704e-05, "loss": 2.403, "step": 370100 }, { "epoch": 9.48, "learning_rate": 1.83881545240291e-05, "loss": 2.5822, "step": 370200 }, { "epoch": 9.49, "learning_rate": 1.8379615397752503e-05, "loss": 2.2563, "step": 370300 }, { "epoch": 9.49, "learning_rate": 1.8371076271475905e-05, "loss": 2.4158, "step": 370400 }, { "epoch": 9.49, "learning_rate": 1.8362537145199303e-05, "loss": 2.4103, "step": 370500 }, { "epoch": 9.49, "learning_rate": 1.8353998018922704e-05, "loss": 2.3934, "step": 370600 }, { "epoch": 9.5, "learning_rate": 1.8345458892646106e-05, "loss": 2.6031, "step": 370700 }, { "epoch": 9.5, "learning_rate": 1.8336919766369508e-05, "loss": 2.5486, "step": 370800 }, { "epoch": 9.5, "learning_rate": 1.8328380640092906e-05, "loss": 2.3811, "step": 370900 }, { "epoch": 9.5, "learning_rate": 1.8319841513816307e-05, "loss": 2.5578, "step": 371000 }, { "epoch": 9.51, "learning_rate": 1.831130238753971e-05, "loss": 2.3826, "step": 371100 }, { "epoch": 9.51, "learning_rate": 1.830276326126311e-05, "loss": 2.5775, "step": 371200 }, { "epoch": 9.51, "learning_rate": 1.829422413498651e-05, "loss": 2.6319, "step": 371300 }, { "epoch": 9.51, "learning_rate": 1.828568500870991e-05, "loss": 2.2904, "step": 371400 }, { "epoch": 9.52, "learning_rate": 1.827714588243331e-05, "loss": 2.4549, "step": 371500 }, { "epoch": 9.52, "learning_rate": 1.826860675615671e-05, "loss": 2.612, "step": 371600 }, { "epoch": 9.52, "learning_rate": 1.826006762988011e-05, "loss": 2.444, "step": 371700 }, { "epoch": 9.52, "learning_rate": 1.8251528503603513e-05, "loss": 2.4744, "step": 371800 }, { "epoch": 9.53, "learning_rate": 1.8242989377326914e-05, "loss": 2.5093, "step": 371900 }, { "epoch": 9.53, "learning_rate": 1.8234450251050313e-05, "loss": 2.3673, "step": 372000 }, { "epoch": 9.53, "learning_rate": 1.8225911124773714e-05, "loss": 2.3785, "step": 372100 }, { "epoch": 9.53, "learning_rate": 1.8217371998497116e-05, "loss": 2.5666, "step": 372200 }, { "epoch": 9.54, "learning_rate": 1.8208832872220517e-05, "loss": 2.251, "step": 372300 }, { "epoch": 9.54, "learning_rate": 1.8200293745943915e-05, "loss": 2.5145, "step": 372400 }, { "epoch": 9.54, "learning_rate": 1.8191754619667317e-05, "loss": 2.4607, "step": 372500 }, { "epoch": 9.55, "learning_rate": 1.818321549339072e-05, "loss": 2.5412, "step": 372600 }, { "epoch": 9.55, "learning_rate": 1.817467636711412e-05, "loss": 2.4565, "step": 372700 }, { "epoch": 9.55, "learning_rate": 1.8166137240837518e-05, "loss": 2.4761, "step": 372800 }, { "epoch": 9.55, "learning_rate": 1.815759811456092e-05, "loss": 2.4169, "step": 372900 }, { "epoch": 9.56, "learning_rate": 1.814905898828432e-05, "loss": 2.298, "step": 373000 }, { "epoch": 9.56, "learning_rate": 1.814051986200772e-05, "loss": 2.4365, "step": 373100 }, { "epoch": 9.56, "learning_rate": 1.813198073573112e-05, "loss": 2.5362, "step": 373200 }, { "epoch": 9.56, "learning_rate": 1.8123441609454523e-05, "loss": 2.4336, "step": 373300 }, { "epoch": 9.57, "learning_rate": 1.8114902483177924e-05, "loss": 2.2315, "step": 373400 }, { "epoch": 9.57, "learning_rate": 1.8106363356901322e-05, "loss": 2.304, "step": 373500 }, { "epoch": 9.57, "learning_rate": 1.8097824230624724e-05, "loss": 2.444, "step": 373600 }, { "epoch": 9.57, "learning_rate": 1.8089285104348125e-05, "loss": 2.4737, "step": 373700 }, { "epoch": 9.58, "learning_rate": 1.8080745978071527e-05, "loss": 2.4807, "step": 373800 }, { "epoch": 9.58, "learning_rate": 1.8072206851794925e-05, "loss": 2.4411, "step": 373900 }, { "epoch": 9.58, "learning_rate": 1.8063667725518327e-05, "loss": 2.4845, "step": 374000 }, { "epoch": 9.58, "learning_rate": 1.8055128599241728e-05, "loss": 2.5516, "step": 374100 }, { "epoch": 9.59, "learning_rate": 1.8046589472965126e-05, "loss": 2.4418, "step": 374200 }, { "epoch": 9.59, "learning_rate": 1.8038050346688528e-05, "loss": 2.4746, "step": 374300 }, { "epoch": 9.59, "learning_rate": 1.802951122041193e-05, "loss": 2.2424, "step": 374400 }, { "epoch": 9.59, "learning_rate": 1.802097209413533e-05, "loss": 2.537, "step": 374500 }, { "epoch": 9.6, "learning_rate": 1.801243296785873e-05, "loss": 2.4697, "step": 374600 }, { "epoch": 9.6, "learning_rate": 1.800389384158213e-05, "loss": 2.4984, "step": 374700 }, { "epoch": 9.6, "learning_rate": 1.7995354715305532e-05, "loss": 2.6161, "step": 374800 }, { "epoch": 9.6, "learning_rate": 1.7986815589028934e-05, "loss": 2.4842, "step": 374900 }, { "epoch": 9.61, "learning_rate": 1.7978276462752332e-05, "loss": 2.2586, "step": 375000 }, { "epoch": 9.61, "learning_rate": 1.7969737336475734e-05, "loss": 2.5688, "step": 375100 }, { "epoch": 9.61, "learning_rate": 1.7961198210199135e-05, "loss": 2.3439, "step": 375200 }, { "epoch": 9.61, "learning_rate": 1.7952659083922537e-05, "loss": 2.4299, "step": 375300 }, { "epoch": 9.62, "learning_rate": 1.7944119957645935e-05, "loss": 2.6774, "step": 375400 }, { "epoch": 9.62, "learning_rate": 1.7935580831369336e-05, "loss": 2.4918, "step": 375500 }, { "epoch": 9.62, "learning_rate": 1.7927041705092738e-05, "loss": 2.3825, "step": 375600 }, { "epoch": 9.62, "learning_rate": 1.7918502578816136e-05, "loss": 2.4744, "step": 375700 }, { "epoch": 9.63, "learning_rate": 1.7909963452539538e-05, "loss": 2.4258, "step": 375800 }, { "epoch": 9.63, "learning_rate": 1.790142432626294e-05, "loss": 2.5512, "step": 375900 }, { "epoch": 9.63, "learning_rate": 1.789288519998634e-05, "loss": 2.3063, "step": 376000 }, { "epoch": 9.63, "learning_rate": 1.788434607370974e-05, "loss": 2.4228, "step": 376100 }, { "epoch": 9.64, "learning_rate": 1.787580694743314e-05, "loss": 2.409, "step": 376200 }, { "epoch": 9.64, "learning_rate": 1.7867267821156542e-05, "loss": 2.4866, "step": 376300 }, { "epoch": 9.64, "learning_rate": 1.7858728694879943e-05, "loss": 2.4487, "step": 376400 }, { "epoch": 9.64, "learning_rate": 1.785018956860334e-05, "loss": 2.3571, "step": 376500 }, { "epoch": 9.65, "learning_rate": 1.7841650442326743e-05, "loss": 2.6049, "step": 376600 }, { "epoch": 9.65, "learning_rate": 1.7833111316050145e-05, "loss": 2.4646, "step": 376700 }, { "epoch": 9.65, "learning_rate": 1.7824572189773546e-05, "loss": 2.3863, "step": 376800 }, { "epoch": 9.66, "learning_rate": 1.7816033063496944e-05, "loss": 2.4522, "step": 376900 }, { "epoch": 9.66, "learning_rate": 1.7807493937220343e-05, "loss": 2.4233, "step": 377000 }, { "epoch": 9.66, "learning_rate": 1.7798954810943744e-05, "loss": 2.5585, "step": 377100 }, { "epoch": 9.66, "learning_rate": 1.7790415684667146e-05, "loss": 2.6039, "step": 377200 }, { "epoch": 9.67, "learning_rate": 1.7781876558390544e-05, "loss": 2.502, "step": 377300 }, { "epoch": 9.67, "learning_rate": 1.7773337432113945e-05, "loss": 2.5336, "step": 377400 }, { "epoch": 9.67, "learning_rate": 1.7764798305837347e-05, "loss": 2.4525, "step": 377500 }, { "epoch": 9.67, "learning_rate": 1.775625917956075e-05, "loss": 2.5984, "step": 377600 }, { "epoch": 9.68, "learning_rate": 1.7747720053284147e-05, "loss": 2.4445, "step": 377700 }, { "epoch": 9.68, "learning_rate": 1.7739180927007548e-05, "loss": 2.2989, "step": 377800 }, { "epoch": 9.68, "learning_rate": 1.773064180073095e-05, "loss": 2.4097, "step": 377900 }, { "epoch": 9.68, "learning_rate": 1.7722102674454348e-05, "loss": 2.4614, "step": 378000 }, { "epoch": 9.69, "learning_rate": 1.771356354817775e-05, "loss": 2.4558, "step": 378100 }, { "epoch": 9.69, "learning_rate": 1.770502442190115e-05, "loss": 2.4049, "step": 378200 }, { "epoch": 9.69, "learning_rate": 1.7696485295624553e-05, "loss": 2.5365, "step": 378300 }, { "epoch": 9.69, "learning_rate": 1.768794616934795e-05, "loss": 2.5574, "step": 378400 }, { "epoch": 9.7, "learning_rate": 1.7679407043071352e-05, "loss": 2.5316, "step": 378500 }, { "epoch": 9.7, "learning_rate": 1.7670867916794754e-05, "loss": 2.5792, "step": 378600 }, { "epoch": 9.7, "learning_rate": 1.7662328790518155e-05, "loss": 2.372, "step": 378700 }, { "epoch": 9.7, "learning_rate": 1.7653789664241554e-05, "loss": 2.5235, "step": 378800 }, { "epoch": 9.71, "learning_rate": 1.7645250537964955e-05, "loss": 2.5767, "step": 378900 }, { "epoch": 9.71, "learning_rate": 1.7636711411688357e-05, "loss": 2.4736, "step": 379000 }, { "epoch": 9.71, "learning_rate": 1.7628172285411755e-05, "loss": 2.4114, "step": 379100 }, { "epoch": 9.71, "learning_rate": 1.7619633159135156e-05, "loss": 2.3932, "step": 379200 }, { "epoch": 9.72, "learning_rate": 1.7611094032858558e-05, "loss": 2.5267, "step": 379300 }, { "epoch": 9.72, "learning_rate": 1.760255490658196e-05, "loss": 2.5374, "step": 379400 }, { "epoch": 9.72, "learning_rate": 1.7594015780305358e-05, "loss": 2.4665, "step": 379500 }, { "epoch": 9.72, "learning_rate": 1.758547665402876e-05, "loss": 2.491, "step": 379600 }, { "epoch": 9.73, "learning_rate": 1.757693752775216e-05, "loss": 2.5024, "step": 379700 }, { "epoch": 9.73, "learning_rate": 1.7568398401475562e-05, "loss": 2.5219, "step": 379800 }, { "epoch": 9.73, "learning_rate": 1.755985927519896e-05, "loss": 2.4444, "step": 379900 }, { "epoch": 9.73, "learning_rate": 1.7551320148922362e-05, "loss": 2.4486, "step": 380000 }, { "epoch": 9.74, "learning_rate": 1.7542781022645763e-05, "loss": 2.4555, "step": 380100 }, { "epoch": 9.74, "learning_rate": 1.7534241896369165e-05, "loss": 2.4537, "step": 380200 }, { "epoch": 9.74, "learning_rate": 1.7525702770092563e-05, "loss": 2.4673, "step": 380300 }, { "epoch": 9.74, "learning_rate": 1.7517163643815965e-05, "loss": 2.5531, "step": 380400 }, { "epoch": 9.75, "learning_rate": 1.7508624517539366e-05, "loss": 2.5371, "step": 380500 }, { "epoch": 9.75, "learning_rate": 1.7500085391262764e-05, "loss": 2.692, "step": 380600 }, { "epoch": 9.75, "learning_rate": 1.7491546264986166e-05, "loss": 2.2664, "step": 380700 }, { "epoch": 9.76, "learning_rate": 1.7483007138709568e-05, "loss": 2.4715, "step": 380800 }, { "epoch": 9.76, "learning_rate": 1.747446801243297e-05, "loss": 2.4072, "step": 380900 }, { "epoch": 9.76, "learning_rate": 1.7465928886156367e-05, "loss": 2.3953, "step": 381000 }, { "epoch": 9.76, "learning_rate": 1.745738975987977e-05, "loss": 2.3845, "step": 381100 }, { "epoch": 9.77, "learning_rate": 1.744885063360317e-05, "loss": 2.4376, "step": 381200 }, { "epoch": 9.77, "learning_rate": 1.7440311507326572e-05, "loss": 2.5648, "step": 381300 }, { "epoch": 9.77, "learning_rate": 1.743177238104997e-05, "loss": 2.4225, "step": 381400 }, { "epoch": 9.77, "learning_rate": 1.742323325477337e-05, "loss": 2.2958, "step": 381500 }, { "epoch": 9.78, "learning_rate": 1.7414694128496773e-05, "loss": 2.5642, "step": 381600 }, { "epoch": 9.78, "learning_rate": 1.7406155002220175e-05, "loss": 2.4446, "step": 381700 }, { "epoch": 9.78, "learning_rate": 1.7397615875943573e-05, "loss": 2.4259, "step": 381800 }, { "epoch": 9.78, "learning_rate": 1.7389076749666974e-05, "loss": 2.4643, "step": 381900 }, { "epoch": 9.79, "learning_rate": 1.7380537623390376e-05, "loss": 2.5778, "step": 382000 }, { "epoch": 9.79, "learning_rate": 1.7371998497113774e-05, "loss": 2.3056, "step": 382100 }, { "epoch": 9.79, "learning_rate": 1.7363459370837176e-05, "loss": 2.4528, "step": 382200 }, { "epoch": 9.79, "learning_rate": 1.7354920244560577e-05, "loss": 2.5293, "step": 382300 }, { "epoch": 9.8, "learning_rate": 1.734638111828398e-05, "loss": 2.552, "step": 382400 }, { "epoch": 9.8, "learning_rate": 1.7337841992007377e-05, "loss": 2.4343, "step": 382500 }, { "epoch": 9.8, "learning_rate": 1.732930286573078e-05, "loss": 2.4091, "step": 382600 }, { "epoch": 9.8, "learning_rate": 1.732076373945418e-05, "loss": 2.5018, "step": 382700 }, { "epoch": 9.81, "learning_rate": 1.731222461317758e-05, "loss": 2.4653, "step": 382800 }, { "epoch": 9.81, "learning_rate": 1.730368548690098e-05, "loss": 2.4213, "step": 382900 }, { "epoch": 9.81, "learning_rate": 1.729514636062438e-05, "loss": 2.5149, "step": 383000 }, { "epoch": 9.81, "learning_rate": 1.7286607234347783e-05, "loss": 2.3389, "step": 383100 }, { "epoch": 9.82, "learning_rate": 1.727806810807118e-05, "loss": 2.2932, "step": 383200 }, { "epoch": 9.82, "learning_rate": 1.7269528981794583e-05, "loss": 2.425, "step": 383300 }, { "epoch": 9.82, "learning_rate": 1.7260989855517984e-05, "loss": 2.5606, "step": 383400 }, { "epoch": 9.82, "learning_rate": 1.7252450729241386e-05, "loss": 2.2893, "step": 383500 }, { "epoch": 9.83, "learning_rate": 1.7243911602964784e-05, "loss": 2.6422, "step": 383600 }, { "epoch": 9.83, "learning_rate": 1.7235372476688185e-05, "loss": 2.4667, "step": 383700 }, { "epoch": 9.83, "learning_rate": 1.7226833350411587e-05, "loss": 2.4505, "step": 383800 }, { "epoch": 9.83, "learning_rate": 1.721829422413499e-05, "loss": 2.4373, "step": 383900 }, { "epoch": 9.84, "learning_rate": 1.7209755097858387e-05, "loss": 2.5523, "step": 384000 }, { "epoch": 9.84, "learning_rate": 1.7201215971581788e-05, "loss": 2.5825, "step": 384100 }, { "epoch": 9.84, "learning_rate": 1.719267684530519e-05, "loss": 2.2537, "step": 384200 }, { "epoch": 9.84, "learning_rate": 1.718413771902859e-05, "loss": 2.592, "step": 384300 }, { "epoch": 9.85, "learning_rate": 1.717559859275199e-05, "loss": 2.391, "step": 384400 }, { "epoch": 9.85, "learning_rate": 1.716705946647539e-05, "loss": 2.5364, "step": 384500 }, { "epoch": 9.85, "learning_rate": 1.7158520340198792e-05, "loss": 2.3426, "step": 384600 }, { "epoch": 9.86, "learning_rate": 1.714998121392219e-05, "loss": 2.6583, "step": 384700 }, { "epoch": 9.86, "learning_rate": 1.7141442087645592e-05, "loss": 2.4238, "step": 384800 }, { "epoch": 9.86, "learning_rate": 1.7132902961368994e-05, "loss": 2.2801, "step": 384900 }, { "epoch": 9.86, "learning_rate": 1.7124363835092395e-05, "loss": 2.495, "step": 385000 }, { "epoch": 9.87, "learning_rate": 1.7115824708815793e-05, "loss": 2.4237, "step": 385100 }, { "epoch": 9.87, "learning_rate": 1.7107285582539195e-05, "loss": 2.4236, "step": 385200 }, { "epoch": 9.87, "learning_rate": 1.7098746456262597e-05, "loss": 2.2743, "step": 385300 }, { "epoch": 9.87, "learning_rate": 1.7090207329985998e-05, "loss": 2.4369, "step": 385400 }, { "epoch": 9.88, "learning_rate": 1.7081668203709396e-05, "loss": 2.568, "step": 385500 }, { "epoch": 9.88, "learning_rate": 1.7073129077432798e-05, "loss": 2.3912, "step": 385600 }, { "epoch": 9.88, "learning_rate": 1.70645899511562e-05, "loss": 2.4135, "step": 385700 }, { "epoch": 9.88, "learning_rate": 1.7056050824879598e-05, "loss": 2.3469, "step": 385800 }, { "epoch": 9.89, "learning_rate": 1.7047511698603e-05, "loss": 2.5563, "step": 385900 }, { "epoch": 9.89, "learning_rate": 1.70389725723264e-05, "loss": 2.456, "step": 386000 }, { "epoch": 9.89, "learning_rate": 1.7030433446049802e-05, "loss": 2.4716, "step": 386100 }, { "epoch": 9.89, "learning_rate": 1.70218943197732e-05, "loss": 2.4082, "step": 386200 }, { "epoch": 9.9, "learning_rate": 1.7013355193496602e-05, "loss": 2.5351, "step": 386300 }, { "epoch": 9.9, "learning_rate": 1.7004816067220003e-05, "loss": 2.4178, "step": 386400 }, { "epoch": 9.9, "learning_rate": 1.6996276940943405e-05, "loss": 2.474, "step": 386500 }, { "epoch": 9.9, "learning_rate": 1.6987737814666803e-05, "loss": 2.4404, "step": 386600 }, { "epoch": 9.91, "learning_rate": 1.6979198688390205e-05, "loss": 2.4461, "step": 386700 }, { "epoch": 9.91, "learning_rate": 1.6970659562113606e-05, "loss": 2.5338, "step": 386800 }, { "epoch": 9.91, "learning_rate": 1.6962120435837008e-05, "loss": 2.4256, "step": 386900 }, { "epoch": 9.91, "learning_rate": 1.6953581309560406e-05, "loss": 2.5794, "step": 387000 }, { "epoch": 9.92, "learning_rate": 1.6945042183283807e-05, "loss": 2.3708, "step": 387100 }, { "epoch": 9.92, "learning_rate": 1.693650305700721e-05, "loss": 2.5113, "step": 387200 }, { "epoch": 9.92, "learning_rate": 1.6927963930730607e-05, "loss": 2.3322, "step": 387300 }, { "epoch": 9.92, "learning_rate": 1.691942480445401e-05, "loss": 2.4048, "step": 387400 }, { "epoch": 9.93, "learning_rate": 1.691088567817741e-05, "loss": 2.4437, "step": 387500 }, { "epoch": 9.93, "learning_rate": 1.6902346551900812e-05, "loss": 2.4859, "step": 387600 }, { "epoch": 9.93, "learning_rate": 1.689380742562421e-05, "loss": 2.516, "step": 387700 }, { "epoch": 9.93, "learning_rate": 1.688526829934761e-05, "loss": 2.5643, "step": 387800 }, { "epoch": 9.94, "learning_rate": 1.6876729173071013e-05, "loss": 2.4569, "step": 387900 }, { "epoch": 9.94, "learning_rate": 1.6868190046794415e-05, "loss": 2.5162, "step": 388000 }, { "epoch": 9.94, "learning_rate": 1.6859650920517813e-05, "loss": 2.3696, "step": 388100 }, { "epoch": 9.94, "learning_rate": 1.6851111794241214e-05, "loss": 2.4488, "step": 388200 }, { "epoch": 9.95, "learning_rate": 1.6842572667964616e-05, "loss": 2.5093, "step": 388300 }, { "epoch": 9.95, "learning_rate": 1.6834033541688014e-05, "loss": 2.4386, "step": 388400 }, { "epoch": 9.95, "learning_rate": 1.6825494415411416e-05, "loss": 2.3979, "step": 388500 }, { "epoch": 9.95, "learning_rate": 1.6816955289134817e-05, "loss": 2.6886, "step": 388600 }, { "epoch": 9.96, "learning_rate": 1.680841616285822e-05, "loss": 2.5377, "step": 388700 }, { "epoch": 9.96, "learning_rate": 1.6799877036581617e-05, "loss": 2.5474, "step": 388800 }, { "epoch": 9.96, "learning_rate": 1.679133791030502e-05, "loss": 2.562, "step": 388900 }, { "epoch": 9.97, "learning_rate": 1.678279878402842e-05, "loss": 2.4386, "step": 389000 }, { "epoch": 9.97, "learning_rate": 1.677425965775182e-05, "loss": 2.6628, "step": 389100 }, { "epoch": 9.97, "learning_rate": 1.676572053147522e-05, "loss": 2.381, "step": 389200 }, { "epoch": 9.97, "learning_rate": 1.675718140519862e-05, "loss": 2.5905, "step": 389300 }, { "epoch": 9.98, "learning_rate": 1.6748642278922023e-05, "loss": 2.2932, "step": 389400 }, { "epoch": 9.98, "learning_rate": 1.6740103152645424e-05, "loss": 2.3794, "step": 389500 }, { "epoch": 9.98, "learning_rate": 1.6731564026368822e-05, "loss": 2.4643, "step": 389600 }, { "epoch": 9.98, "learning_rate": 1.6723024900092224e-05, "loss": 2.3464, "step": 389700 }, { "epoch": 9.99, "learning_rate": 1.6714485773815626e-05, "loss": 2.4667, "step": 389800 }, { "epoch": 9.99, "learning_rate": 1.6705946647539024e-05, "loss": 2.4188, "step": 389900 }, { "epoch": 9.99, "learning_rate": 1.6697407521262425e-05, "loss": 2.273, "step": 390000 }, { "epoch": 9.99, "learning_rate": 1.6688868394985827e-05, "loss": 2.517, "step": 390100 }, { "epoch": 10.0, "learning_rate": 1.668032926870923e-05, "loss": 2.405, "step": 390200 }, { "epoch": 10.0, "learning_rate": 1.6671790142432627e-05, "loss": 2.4721, "step": 390300 }, { "epoch": 10.0, "learning_rate": 1.6663251016156028e-05, "loss": 2.3936, "step": 390400 }, { "epoch": 10.0, "learning_rate": 1.665471188987943e-05, "loss": 2.3981, "step": 390500 }, { "epoch": 10.01, "learning_rate": 1.664617276360283e-05, "loss": 2.5434, "step": 390600 }, { "epoch": 10.01, "learning_rate": 1.663763363732623e-05, "loss": 2.4544, "step": 390700 }, { "epoch": 10.01, "learning_rate": 1.662909451104963e-05, "loss": 2.3425, "step": 390800 }, { "epoch": 10.01, "learning_rate": 1.6620555384773032e-05, "loss": 2.4189, "step": 390900 }, { "epoch": 10.02, "learning_rate": 1.661201625849643e-05, "loss": 2.4276, "step": 391000 }, { "epoch": 10.02, "learning_rate": 1.6603477132219832e-05, "loss": 2.3498, "step": 391100 }, { "epoch": 10.02, "learning_rate": 1.6594938005943234e-05, "loss": 2.3557, "step": 391200 }, { "epoch": 10.02, "learning_rate": 1.6586398879666635e-05, "loss": 2.4556, "step": 391300 }, { "epoch": 10.03, "learning_rate": 1.6577859753390033e-05, "loss": 2.477, "step": 391400 }, { "epoch": 10.03, "learning_rate": 1.6569320627113435e-05, "loss": 2.3401, "step": 391500 }, { "epoch": 10.03, "learning_rate": 1.6560781500836836e-05, "loss": 2.5818, "step": 391600 }, { "epoch": 10.03, "learning_rate": 1.6552242374560238e-05, "loss": 2.3227, "step": 391700 }, { "epoch": 10.04, "learning_rate": 1.6543703248283636e-05, "loss": 2.5602, "step": 391800 }, { "epoch": 10.04, "learning_rate": 1.6535164122007038e-05, "loss": 2.6084, "step": 391900 }, { "epoch": 10.04, "learning_rate": 1.652662499573044e-05, "loss": 2.7121, "step": 392000 }, { "epoch": 10.04, "learning_rate": 1.651808586945384e-05, "loss": 2.3617, "step": 392100 }, { "epoch": 10.05, "learning_rate": 1.650954674317724e-05, "loss": 2.3958, "step": 392200 }, { "epoch": 10.05, "learning_rate": 1.650100761690064e-05, "loss": 2.3304, "step": 392300 }, { "epoch": 10.05, "learning_rate": 1.6492468490624042e-05, "loss": 2.2532, "step": 392400 }, { "epoch": 10.05, "learning_rate": 1.648392936434744e-05, "loss": 2.3591, "step": 392500 }, { "epoch": 10.06, "learning_rate": 1.6475390238070842e-05, "loss": 2.3205, "step": 392600 }, { "epoch": 10.06, "learning_rate": 1.6466851111794243e-05, "loss": 2.504, "step": 392700 }, { "epoch": 10.06, "learning_rate": 1.6458311985517645e-05, "loss": 2.4209, "step": 392800 }, { "epoch": 10.07, "learning_rate": 1.6449772859241043e-05, "loss": 2.4739, "step": 392900 }, { "epoch": 10.07, "learning_rate": 1.6441233732964445e-05, "loss": 2.3814, "step": 393000 }, { "epoch": 10.07, "learning_rate": 1.6432694606687846e-05, "loss": 2.5431, "step": 393100 }, { "epoch": 10.07, "learning_rate": 1.6424155480411248e-05, "loss": 2.2433, "step": 393200 }, { "epoch": 10.08, "learning_rate": 1.6415616354134646e-05, "loss": 2.3893, "step": 393300 }, { "epoch": 10.08, "learning_rate": 1.6407077227858047e-05, "loss": 2.3591, "step": 393400 }, { "epoch": 10.08, "learning_rate": 1.639853810158145e-05, "loss": 2.3984, "step": 393500 }, { "epoch": 10.08, "learning_rate": 1.638999897530485e-05, "loss": 2.3859, "step": 393600 }, { "epoch": 10.09, "learning_rate": 1.638145984902825e-05, "loss": 2.4652, "step": 393700 }, { "epoch": 10.09, "learning_rate": 1.637292072275165e-05, "loss": 2.5129, "step": 393800 }, { "epoch": 10.09, "learning_rate": 1.6364381596475052e-05, "loss": 2.414, "step": 393900 }, { "epoch": 10.09, "learning_rate": 1.635584247019845e-05, "loss": 2.4516, "step": 394000 }, { "epoch": 10.1, "learning_rate": 1.634730334392185e-05, "loss": 2.4073, "step": 394100 }, { "epoch": 10.1, "learning_rate": 1.6338764217645253e-05, "loss": 2.39, "step": 394200 }, { "epoch": 10.1, "learning_rate": 1.6330225091368655e-05, "loss": 2.3152, "step": 394300 }, { "epoch": 10.1, "learning_rate": 1.6321685965092053e-05, "loss": 2.5527, "step": 394400 }, { "epoch": 10.11, "learning_rate": 1.631314683881545e-05, "loss": 2.3535, "step": 394500 }, { "epoch": 10.11, "learning_rate": 1.6304607712538852e-05, "loss": 2.4134, "step": 394600 }, { "epoch": 10.11, "learning_rate": 1.6296068586262254e-05, "loss": 2.4877, "step": 394700 }, { "epoch": 10.11, "learning_rate": 1.6287529459985652e-05, "loss": 2.449, "step": 394800 }, { "epoch": 10.12, "learning_rate": 1.6278990333709054e-05, "loss": 2.4072, "step": 394900 }, { "epoch": 10.12, "learning_rate": 1.6270451207432455e-05, "loss": 2.4447, "step": 395000 }, { "epoch": 10.12, "learning_rate": 1.6261912081155857e-05, "loss": 2.3707, "step": 395100 }, { "epoch": 10.12, "learning_rate": 1.6253372954879255e-05, "loss": 2.3866, "step": 395200 }, { "epoch": 10.13, "learning_rate": 1.6244833828602657e-05, "loss": 2.4695, "step": 395300 }, { "epoch": 10.13, "learning_rate": 1.6236294702326058e-05, "loss": 2.3628, "step": 395400 }, { "epoch": 10.13, "learning_rate": 1.622775557604946e-05, "loss": 2.4668, "step": 395500 }, { "epoch": 10.13, "learning_rate": 1.6219216449772858e-05, "loss": 2.4534, "step": 395600 }, { "epoch": 10.14, "learning_rate": 1.621067732349626e-05, "loss": 2.3748, "step": 395700 }, { "epoch": 10.14, "learning_rate": 1.620213819721966e-05, "loss": 2.3559, "step": 395800 }, { "epoch": 10.14, "learning_rate": 1.6193599070943062e-05, "loss": 2.4506, "step": 395900 }, { "epoch": 10.14, "learning_rate": 1.618505994466646e-05, "loss": 2.1887, "step": 396000 }, { "epoch": 10.15, "learning_rate": 1.6176520818389862e-05, "loss": 2.3987, "step": 396100 }, { "epoch": 10.15, "learning_rate": 1.6167981692113264e-05, "loss": 2.2123, "step": 396200 }, { "epoch": 10.15, "learning_rate": 1.6159442565836662e-05, "loss": 2.4118, "step": 396300 }, { "epoch": 10.15, "learning_rate": 1.6150903439560063e-05, "loss": 2.4012, "step": 396400 }, { "epoch": 10.16, "learning_rate": 1.6142364313283465e-05, "loss": 2.4437, "step": 396500 }, { "epoch": 10.16, "learning_rate": 1.6133825187006866e-05, "loss": 2.4596, "step": 396600 }, { "epoch": 10.16, "learning_rate": 1.6125286060730265e-05, "loss": 2.272, "step": 396700 }, { "epoch": 10.16, "learning_rate": 1.6116746934453666e-05, "loss": 2.4186, "step": 396800 }, { "epoch": 10.17, "learning_rate": 1.6108207808177068e-05, "loss": 2.3864, "step": 396900 }, { "epoch": 10.17, "learning_rate": 1.609966868190047e-05, "loss": 2.5098, "step": 397000 }, { "epoch": 10.17, "learning_rate": 1.6091129555623867e-05, "loss": 2.2682, "step": 397100 }, { "epoch": 10.18, "learning_rate": 1.608259042934727e-05, "loss": 2.4155, "step": 397200 }, { "epoch": 10.18, "learning_rate": 1.607405130307067e-05, "loss": 2.5484, "step": 397300 }, { "epoch": 10.18, "learning_rate": 1.606551217679407e-05, "loss": 2.3424, "step": 397400 }, { "epoch": 10.18, "learning_rate": 1.605697305051747e-05, "loss": 2.439, "step": 397500 }, { "epoch": 10.19, "learning_rate": 1.6048433924240872e-05, "loss": 2.3458, "step": 397600 }, { "epoch": 10.19, "learning_rate": 1.6039894797964273e-05, "loss": 2.4439, "step": 397700 }, { "epoch": 10.19, "learning_rate": 1.603135567168767e-05, "loss": 2.3554, "step": 397800 }, { "epoch": 10.19, "learning_rate": 1.6022816545411073e-05, "loss": 2.4306, "step": 397900 }, { "epoch": 10.2, "learning_rate": 1.6014277419134475e-05, "loss": 2.4042, "step": 398000 }, { "epoch": 10.2, "learning_rate": 1.6005738292857876e-05, "loss": 2.5338, "step": 398100 }, { "epoch": 10.2, "learning_rate": 1.5997199166581274e-05, "loss": 2.452, "step": 398200 }, { "epoch": 10.2, "learning_rate": 1.5988660040304676e-05, "loss": 2.3639, "step": 398300 }, { "epoch": 10.21, "learning_rate": 1.5980120914028077e-05, "loss": 2.3826, "step": 398400 }, { "epoch": 10.21, "learning_rate": 1.597158178775148e-05, "loss": 2.3524, "step": 398500 }, { "epoch": 10.21, "learning_rate": 1.5963042661474877e-05, "loss": 2.5094, "step": 398600 }, { "epoch": 10.21, "learning_rate": 1.595450353519828e-05, "loss": 2.2688, "step": 398700 }, { "epoch": 10.22, "learning_rate": 1.594596440892168e-05, "loss": 2.3228, "step": 398800 }, { "epoch": 10.22, "learning_rate": 1.593742528264508e-05, "loss": 2.366, "step": 398900 }, { "epoch": 10.22, "learning_rate": 1.592888615636848e-05, "loss": 2.4432, "step": 399000 }, { "epoch": 10.22, "learning_rate": 1.592034703009188e-05, "loss": 2.5214, "step": 399100 }, { "epoch": 10.23, "learning_rate": 1.5911807903815283e-05, "loss": 2.4662, "step": 399200 }, { "epoch": 10.23, "learning_rate": 1.590326877753868e-05, "loss": 2.3677, "step": 399300 }, { "epoch": 10.23, "learning_rate": 1.5894729651262083e-05, "loss": 2.3994, "step": 399400 }, { "epoch": 10.23, "learning_rate": 1.5886190524985484e-05, "loss": 2.4339, "step": 399500 }, { "epoch": 10.24, "learning_rate": 1.5877651398708886e-05, "loss": 2.465, "step": 399600 }, { "epoch": 10.24, "learning_rate": 1.5869112272432284e-05, "loss": 2.5653, "step": 399700 }, { "epoch": 10.24, "learning_rate": 1.5860573146155686e-05, "loss": 2.4402, "step": 399800 }, { "epoch": 10.24, "learning_rate": 1.5852034019879087e-05, "loss": 2.4643, "step": 399900 }, { "epoch": 10.25, "learning_rate": 1.5843494893602485e-05, "loss": 2.4224, "step": 400000 }, { "epoch": 10.25, "learning_rate": 1.5834955767325887e-05, "loss": 2.3672, "step": 400100 }, { "epoch": 10.25, "learning_rate": 1.582641664104929e-05, "loss": 2.3876, "step": 400200 }, { "epoch": 10.25, "learning_rate": 1.581787751477269e-05, "loss": 2.2916, "step": 400300 }, { "epoch": 10.26, "learning_rate": 1.5809338388496088e-05, "loss": 2.4448, "step": 400400 }, { "epoch": 10.26, "learning_rate": 1.580079926221949e-05, "loss": 2.4192, "step": 400500 }, { "epoch": 10.26, "learning_rate": 1.579226013594289e-05, "loss": 2.4131, "step": 400600 }, { "epoch": 10.26, "learning_rate": 1.5783721009666293e-05, "loss": 2.4761, "step": 400700 }, { "epoch": 10.27, "learning_rate": 1.577518188338969e-05, "loss": 2.4109, "step": 400800 }, { "epoch": 10.27, "learning_rate": 1.5766642757113092e-05, "loss": 2.3231, "step": 400900 }, { "epoch": 10.27, "learning_rate": 1.5758103630836494e-05, "loss": 2.3753, "step": 401000 }, { "epoch": 10.28, "learning_rate": 1.5749564504559895e-05, "loss": 2.3626, "step": 401100 }, { "epoch": 10.28, "learning_rate": 1.5741025378283294e-05, "loss": 2.4065, "step": 401200 }, { "epoch": 10.28, "learning_rate": 1.5732486252006695e-05, "loss": 2.3983, "step": 401300 }, { "epoch": 10.28, "learning_rate": 1.5723947125730097e-05, "loss": 2.5158, "step": 401400 }, { "epoch": 10.29, "learning_rate": 1.5715407999453495e-05, "loss": 2.4137, "step": 401500 }, { "epoch": 10.29, "learning_rate": 1.5706868873176896e-05, "loss": 2.5449, "step": 401600 }, { "epoch": 10.29, "learning_rate": 1.5698329746900298e-05, "loss": 2.1586, "step": 401700 }, { "epoch": 10.29, "learning_rate": 1.56897906206237e-05, "loss": 2.2384, "step": 401800 }, { "epoch": 10.3, "learning_rate": 1.5681251494347098e-05, "loss": 2.4827, "step": 401900 }, { "epoch": 10.3, "learning_rate": 1.56727123680705e-05, "loss": 2.6007, "step": 402000 }, { "epoch": 10.3, "learning_rate": 1.56641732417939e-05, "loss": 2.5747, "step": 402100 }, { "epoch": 10.3, "learning_rate": 1.5655634115517302e-05, "loss": 2.3761, "step": 402200 }, { "epoch": 10.31, "learning_rate": 1.56470949892407e-05, "loss": 2.3155, "step": 402300 }, { "epoch": 10.31, "learning_rate": 1.5638555862964102e-05, "loss": 2.4255, "step": 402400 }, { "epoch": 10.31, "learning_rate": 1.5630016736687504e-05, "loss": 2.5638, "step": 402500 }, { "epoch": 10.31, "learning_rate": 1.5621477610410902e-05, "loss": 2.4141, "step": 402600 }, { "epoch": 10.32, "learning_rate": 1.5612938484134303e-05, "loss": 2.506, "step": 402700 }, { "epoch": 10.32, "learning_rate": 1.5604399357857705e-05, "loss": 2.3318, "step": 402800 }, { "epoch": 10.32, "learning_rate": 1.5595860231581106e-05, "loss": 2.5507, "step": 402900 }, { "epoch": 10.32, "learning_rate": 1.5587321105304505e-05, "loss": 2.4233, "step": 403000 }, { "epoch": 10.33, "learning_rate": 1.5578781979027906e-05, "loss": 2.5108, "step": 403100 }, { "epoch": 10.33, "learning_rate": 1.5570242852751308e-05, "loss": 2.3689, "step": 403200 }, { "epoch": 10.33, "learning_rate": 1.556170372647471e-05, "loss": 2.2936, "step": 403300 }, { "epoch": 10.33, "learning_rate": 1.5553164600198107e-05, "loss": 2.3457, "step": 403400 }, { "epoch": 10.34, "learning_rate": 1.554462547392151e-05, "loss": 2.336, "step": 403500 }, { "epoch": 10.34, "learning_rate": 1.553608634764491e-05, "loss": 2.5982, "step": 403600 }, { "epoch": 10.34, "learning_rate": 1.5527547221368312e-05, "loss": 2.3622, "step": 403700 }, { "epoch": 10.34, "learning_rate": 1.551900809509171e-05, "loss": 2.5142, "step": 403800 }, { "epoch": 10.35, "learning_rate": 1.5510468968815112e-05, "loss": 2.3936, "step": 403900 }, { "epoch": 10.35, "learning_rate": 1.5501929842538513e-05, "loss": 2.5448, "step": 404000 }, { "epoch": 10.35, "learning_rate": 1.549339071626191e-05, "loss": 2.3943, "step": 404100 }, { "epoch": 10.35, "learning_rate": 1.5484851589985313e-05, "loss": 2.5677, "step": 404200 }, { "epoch": 10.36, "learning_rate": 1.5476312463708715e-05, "loss": 2.4401, "step": 404300 }, { "epoch": 10.36, "learning_rate": 1.5467773337432116e-05, "loss": 2.34, "step": 404400 }, { "epoch": 10.36, "learning_rate": 1.5459234211155514e-05, "loss": 2.3057, "step": 404500 }, { "epoch": 10.36, "learning_rate": 1.5450695084878916e-05, "loss": 2.4371, "step": 404600 }, { "epoch": 10.37, "learning_rate": 1.5442155958602317e-05, "loss": 2.2235, "step": 404700 }, { "epoch": 10.37, "learning_rate": 1.543361683232572e-05, "loss": 2.3583, "step": 404800 }, { "epoch": 10.37, "learning_rate": 1.5425077706049117e-05, "loss": 2.3671, "step": 404900 }, { "epoch": 10.38, "learning_rate": 1.541653857977252e-05, "loss": 2.4501, "step": 405000 }, { "epoch": 10.38, "learning_rate": 1.540799945349592e-05, "loss": 2.4837, "step": 405100 }, { "epoch": 10.38, "learning_rate": 1.539946032721932e-05, "loss": 2.4959, "step": 405200 }, { "epoch": 10.38, "learning_rate": 1.539092120094272e-05, "loss": 2.5588, "step": 405300 }, { "epoch": 10.39, "learning_rate": 1.538238207466612e-05, "loss": 2.388, "step": 405400 }, { "epoch": 10.39, "learning_rate": 1.5373842948389523e-05, "loss": 2.3401, "step": 405500 }, { "epoch": 10.39, "learning_rate": 1.536530382211292e-05, "loss": 2.513, "step": 405600 }, { "epoch": 10.39, "learning_rate": 1.5356764695836323e-05, "loss": 2.3475, "step": 405700 }, { "epoch": 10.4, "learning_rate": 1.5348225569559724e-05, "loss": 2.3806, "step": 405800 }, { "epoch": 10.4, "learning_rate": 1.5339686443283126e-05, "loss": 2.4245, "step": 405900 }, { "epoch": 10.4, "learning_rate": 1.5331147317006524e-05, "loss": 2.3078, "step": 406000 }, { "epoch": 10.4, "learning_rate": 1.5322608190729925e-05, "loss": 2.578, "step": 406100 }, { "epoch": 10.41, "learning_rate": 1.5314069064453327e-05, "loss": 2.3105, "step": 406200 }, { "epoch": 10.41, "learning_rate": 1.530552993817673e-05, "loss": 2.4754, "step": 406300 }, { "epoch": 10.41, "learning_rate": 1.5296990811900127e-05, "loss": 2.4158, "step": 406400 }, { "epoch": 10.41, "learning_rate": 1.5288451685623528e-05, "loss": 2.5898, "step": 406500 }, { "epoch": 10.42, "learning_rate": 1.527991255934693e-05, "loss": 2.578, "step": 406600 }, { "epoch": 10.42, "learning_rate": 1.5271373433070328e-05, "loss": 2.5228, "step": 406700 }, { "epoch": 10.42, "learning_rate": 1.526283430679373e-05, "loss": 2.3272, "step": 406800 }, { "epoch": 10.42, "learning_rate": 1.5254295180517131e-05, "loss": 2.4769, "step": 406900 }, { "epoch": 10.43, "learning_rate": 1.5245756054240531e-05, "loss": 2.5348, "step": 407000 }, { "epoch": 10.43, "learning_rate": 1.5237216927963932e-05, "loss": 2.4132, "step": 407100 }, { "epoch": 10.43, "learning_rate": 1.5228677801687332e-05, "loss": 2.3936, "step": 407200 }, { "epoch": 10.43, "learning_rate": 1.5220138675410734e-05, "loss": 2.4092, "step": 407300 }, { "epoch": 10.44, "learning_rate": 1.5211599549134134e-05, "loss": 2.3868, "step": 407400 }, { "epoch": 10.44, "learning_rate": 1.5203060422857535e-05, "loss": 2.3178, "step": 407500 }, { "epoch": 10.44, "learning_rate": 1.5194521296580935e-05, "loss": 2.5125, "step": 407600 }, { "epoch": 10.44, "learning_rate": 1.5185982170304337e-05, "loss": 2.7254, "step": 407700 }, { "epoch": 10.45, "learning_rate": 1.5177443044027737e-05, "loss": 2.4515, "step": 407800 }, { "epoch": 10.45, "learning_rate": 1.5168903917751136e-05, "loss": 2.4938, "step": 407900 }, { "epoch": 10.45, "learning_rate": 1.5160364791474538e-05, "loss": 2.396, "step": 408000 }, { "epoch": 10.45, "learning_rate": 1.5151825665197938e-05, "loss": 2.3851, "step": 408100 }, { "epoch": 10.46, "learning_rate": 1.514328653892134e-05, "loss": 2.5115, "step": 408200 }, { "epoch": 10.46, "learning_rate": 1.513474741264474e-05, "loss": 2.4618, "step": 408300 }, { "epoch": 10.46, "learning_rate": 1.512620828636814e-05, "loss": 2.343, "step": 408400 }, { "epoch": 10.46, "learning_rate": 1.511766916009154e-05, "loss": 2.4015, "step": 408500 }, { "epoch": 10.47, "learning_rate": 1.5109130033814942e-05, "loss": 2.3877, "step": 408600 }, { "epoch": 10.47, "learning_rate": 1.5100590907538342e-05, "loss": 2.5644, "step": 408700 }, { "epoch": 10.47, "learning_rate": 1.5092051781261744e-05, "loss": 2.4597, "step": 408800 }, { "epoch": 10.47, "learning_rate": 1.5083512654985143e-05, "loss": 2.5105, "step": 408900 }, { "epoch": 10.48, "learning_rate": 1.5074973528708545e-05, "loss": 2.4664, "step": 409000 }, { "epoch": 10.48, "learning_rate": 1.5066434402431945e-05, "loss": 2.5024, "step": 409100 }, { "epoch": 10.48, "learning_rate": 1.5057895276155345e-05, "loss": 2.3704, "step": 409200 }, { "epoch": 10.49, "learning_rate": 1.5049356149878746e-05, "loss": 2.5143, "step": 409300 }, { "epoch": 10.49, "learning_rate": 1.5040817023602146e-05, "loss": 2.3204, "step": 409400 }, { "epoch": 10.49, "learning_rate": 1.5032277897325548e-05, "loss": 2.4267, "step": 409500 }, { "epoch": 10.49, "learning_rate": 1.5023738771048947e-05, "loss": 2.4022, "step": 409600 }, { "epoch": 10.5, "learning_rate": 1.5015199644772349e-05, "loss": 2.4902, "step": 409700 }, { "epoch": 10.5, "learning_rate": 1.5006660518495749e-05, "loss": 2.4062, "step": 409800 }, { "epoch": 10.5, "learning_rate": 1.499812139221915e-05, "loss": 2.3848, "step": 409900 }, { "epoch": 10.5, "learning_rate": 1.498958226594255e-05, "loss": 2.3646, "step": 410000 }, { "epoch": 10.51, "learning_rate": 1.4981043139665952e-05, "loss": 2.4942, "step": 410100 }, { "epoch": 10.51, "learning_rate": 1.4972504013389352e-05, "loss": 2.5237, "step": 410200 }, { "epoch": 10.51, "learning_rate": 1.4963964887112753e-05, "loss": 2.4084, "step": 410300 }, { "epoch": 10.51, "learning_rate": 1.4955425760836153e-05, "loss": 2.4429, "step": 410400 }, { "epoch": 10.52, "learning_rate": 1.4946886634559553e-05, "loss": 2.3814, "step": 410500 }, { "epoch": 10.52, "learning_rate": 1.4938347508282954e-05, "loss": 2.6651, "step": 410600 }, { "epoch": 10.52, "learning_rate": 1.4929808382006354e-05, "loss": 2.3788, "step": 410700 }, { "epoch": 10.52, "learning_rate": 1.4921269255729756e-05, "loss": 2.3782, "step": 410800 }, { "epoch": 10.53, "learning_rate": 1.4912730129453156e-05, "loss": 2.444, "step": 410900 }, { "epoch": 10.53, "learning_rate": 1.4904191003176557e-05, "loss": 2.4786, "step": 411000 }, { "epoch": 10.53, "learning_rate": 1.4895651876899957e-05, "loss": 2.408, "step": 411100 }, { "epoch": 10.53, "learning_rate": 1.4887112750623359e-05, "loss": 2.348, "step": 411200 }, { "epoch": 10.54, "learning_rate": 1.4878573624346759e-05, "loss": 2.4413, "step": 411300 }, { "epoch": 10.54, "learning_rate": 1.487003449807016e-05, "loss": 2.2968, "step": 411400 }, { "epoch": 10.54, "learning_rate": 1.486149537179356e-05, "loss": 2.4226, "step": 411500 }, { "epoch": 10.54, "learning_rate": 1.4852956245516961e-05, "loss": 2.4854, "step": 411600 }, { "epoch": 10.55, "learning_rate": 1.4844417119240361e-05, "loss": 2.3968, "step": 411700 }, { "epoch": 10.55, "learning_rate": 1.4835877992963761e-05, "loss": 2.4726, "step": 411800 }, { "epoch": 10.55, "learning_rate": 1.4827338866687163e-05, "loss": 2.4465, "step": 411900 }, { "epoch": 10.55, "learning_rate": 1.4818799740410561e-05, "loss": 2.4776, "step": 412000 }, { "epoch": 10.56, "learning_rate": 1.481026061413396e-05, "loss": 2.3093, "step": 412100 }, { "epoch": 10.56, "learning_rate": 1.4801721487857362e-05, "loss": 2.4531, "step": 412200 }, { "epoch": 10.56, "learning_rate": 1.4793182361580762e-05, "loss": 2.462, "step": 412300 }, { "epoch": 10.56, "learning_rate": 1.4784643235304164e-05, "loss": 2.4598, "step": 412400 }, { "epoch": 10.57, "learning_rate": 1.4776104109027564e-05, "loss": 2.3987, "step": 412500 }, { "epoch": 10.57, "learning_rate": 1.4767564982750965e-05, "loss": 2.5386, "step": 412600 }, { "epoch": 10.57, "learning_rate": 1.4759025856474365e-05, "loss": 2.4256, "step": 412700 }, { "epoch": 10.57, "learning_rate": 1.4750486730197765e-05, "loss": 2.4934, "step": 412800 }, { "epoch": 10.58, "learning_rate": 1.4741947603921166e-05, "loss": 2.4629, "step": 412900 }, { "epoch": 10.58, "learning_rate": 1.4733408477644566e-05, "loss": 2.6451, "step": 413000 }, { "epoch": 10.58, "learning_rate": 1.4724869351367968e-05, "loss": 2.2613, "step": 413100 }, { "epoch": 10.59, "learning_rate": 1.4716330225091368e-05, "loss": 2.4202, "step": 413200 }, { "epoch": 10.59, "learning_rate": 1.470779109881477e-05, "loss": 2.5201, "step": 413300 }, { "epoch": 10.59, "learning_rate": 1.4699251972538169e-05, "loss": 2.5148, "step": 413400 }, { "epoch": 10.59, "learning_rate": 1.469071284626157e-05, "loss": 2.4361, "step": 413500 }, { "epoch": 10.6, "learning_rate": 1.468217371998497e-05, "loss": 2.2988, "step": 413600 }, { "epoch": 10.6, "learning_rate": 1.4673634593708372e-05, "loss": 2.3311, "step": 413700 }, { "epoch": 10.6, "learning_rate": 1.4665095467431772e-05, "loss": 2.5351, "step": 413800 }, { "epoch": 10.6, "learning_rate": 1.4656556341155173e-05, "loss": 2.4125, "step": 413900 }, { "epoch": 10.61, "learning_rate": 1.4648017214878573e-05, "loss": 2.3415, "step": 414000 }, { "epoch": 10.61, "learning_rate": 1.4639478088601973e-05, "loss": 2.4118, "step": 414100 }, { "epoch": 10.61, "learning_rate": 1.4630938962325375e-05, "loss": 2.3756, "step": 414200 }, { "epoch": 10.61, "learning_rate": 1.4622399836048774e-05, "loss": 2.5816, "step": 414300 }, { "epoch": 10.62, "learning_rate": 1.4613860709772176e-05, "loss": 2.4063, "step": 414400 }, { "epoch": 10.62, "learning_rate": 1.4605321583495576e-05, "loss": 2.5107, "step": 414500 }, { "epoch": 10.62, "learning_rate": 1.4596782457218977e-05, "loss": 2.3821, "step": 414600 }, { "epoch": 10.62, "learning_rate": 1.4588243330942377e-05, "loss": 2.4562, "step": 414700 }, { "epoch": 10.63, "learning_rate": 1.4579704204665779e-05, "loss": 2.3442, "step": 414800 }, { "epoch": 10.63, "learning_rate": 1.4571165078389179e-05, "loss": 2.4321, "step": 414900 }, { "epoch": 10.63, "learning_rate": 1.456262595211258e-05, "loss": 2.2946, "step": 415000 }, { "epoch": 10.63, "learning_rate": 1.455408682583598e-05, "loss": 2.2737, "step": 415100 }, { "epoch": 10.64, "learning_rate": 1.4545547699559382e-05, "loss": 2.4658, "step": 415200 }, { "epoch": 10.64, "learning_rate": 1.4537008573282782e-05, "loss": 2.4046, "step": 415300 }, { "epoch": 10.64, "learning_rate": 1.4528469447006181e-05, "loss": 2.443, "step": 415400 }, { "epoch": 10.64, "learning_rate": 1.4519930320729583e-05, "loss": 2.4225, "step": 415500 }, { "epoch": 10.65, "learning_rate": 1.4511391194452983e-05, "loss": 2.454, "step": 415600 }, { "epoch": 10.65, "learning_rate": 1.4502852068176384e-05, "loss": 2.5816, "step": 415700 }, { "epoch": 10.65, "learning_rate": 1.4494312941899784e-05, "loss": 2.3252, "step": 415800 }, { "epoch": 10.65, "learning_rate": 1.4485773815623186e-05, "loss": 2.192, "step": 415900 }, { "epoch": 10.66, "learning_rate": 1.4477234689346586e-05, "loss": 2.4979, "step": 416000 }, { "epoch": 10.66, "learning_rate": 1.4468695563069987e-05, "loss": 2.4697, "step": 416100 }, { "epoch": 10.66, "learning_rate": 1.4460156436793387e-05, "loss": 2.4549, "step": 416200 }, { "epoch": 10.66, "learning_rate": 1.4451617310516789e-05, "loss": 2.353, "step": 416300 }, { "epoch": 10.67, "learning_rate": 1.4443078184240188e-05, "loss": 2.3207, "step": 416400 }, { "epoch": 10.67, "learning_rate": 1.443453905796359e-05, "loss": 2.4548, "step": 416500 }, { "epoch": 10.67, "learning_rate": 1.442599993168699e-05, "loss": 2.54, "step": 416600 }, { "epoch": 10.67, "learning_rate": 1.441746080541039e-05, "loss": 2.5123, "step": 416700 }, { "epoch": 10.68, "learning_rate": 1.4408921679133791e-05, "loss": 2.3531, "step": 416800 }, { "epoch": 10.68, "learning_rate": 1.4400382552857191e-05, "loss": 2.5471, "step": 416900 }, { "epoch": 10.68, "learning_rate": 1.4391843426580593e-05, "loss": 2.3131, "step": 417000 }, { "epoch": 10.69, "learning_rate": 1.4383304300303992e-05, "loss": 2.3687, "step": 417100 }, { "epoch": 10.69, "learning_rate": 1.4374765174027394e-05, "loss": 2.5053, "step": 417200 }, { "epoch": 10.69, "learning_rate": 1.4366226047750794e-05, "loss": 2.3076, "step": 417300 }, { "epoch": 10.69, "learning_rate": 1.4357686921474195e-05, "loss": 2.5531, "step": 417400 }, { "epoch": 10.7, "learning_rate": 1.4349147795197595e-05, "loss": 2.5181, "step": 417500 }, { "epoch": 10.7, "learning_rate": 1.4340608668920997e-05, "loss": 2.4228, "step": 417600 }, { "epoch": 10.7, "learning_rate": 1.4332069542644397e-05, "loss": 2.529, "step": 417700 }, { "epoch": 10.7, "learning_rate": 1.4323530416367798e-05, "loss": 2.3639, "step": 417800 }, { "epoch": 10.71, "learning_rate": 1.4314991290091198e-05, "loss": 2.301, "step": 417900 }, { "epoch": 10.71, "learning_rate": 1.43064521638146e-05, "loss": 2.5247, "step": 418000 }, { "epoch": 10.71, "learning_rate": 1.4297913037538e-05, "loss": 2.4488, "step": 418100 }, { "epoch": 10.71, "learning_rate": 1.42893739112614e-05, "loss": 2.5862, "step": 418200 }, { "epoch": 10.72, "learning_rate": 1.42808347849848e-05, "loss": 2.1738, "step": 418300 }, { "epoch": 10.72, "learning_rate": 1.42722956587082e-05, "loss": 2.4319, "step": 418400 }, { "epoch": 10.72, "learning_rate": 1.4263756532431602e-05, "loss": 2.3571, "step": 418500 }, { "epoch": 10.72, "learning_rate": 1.4255217406155002e-05, "loss": 2.3887, "step": 418600 }, { "epoch": 10.73, "learning_rate": 1.4246678279878404e-05, "loss": 2.5333, "step": 418700 }, { "epoch": 10.73, "learning_rate": 1.4238139153601804e-05, "loss": 2.3024, "step": 418800 }, { "epoch": 10.73, "learning_rate": 1.4229600027325205e-05, "loss": 2.4367, "step": 418900 }, { "epoch": 10.73, "learning_rate": 1.4221060901048605e-05, "loss": 2.4616, "step": 419000 }, { "epoch": 10.74, "learning_rate": 1.4212521774772006e-05, "loss": 2.5386, "step": 419100 }, { "epoch": 10.74, "learning_rate": 1.4203982648495406e-05, "loss": 2.4597, "step": 419200 }, { "epoch": 10.74, "learning_rate": 1.4195443522218808e-05, "loss": 2.3248, "step": 419300 }, { "epoch": 10.74, "learning_rate": 1.4186904395942208e-05, "loss": 2.5811, "step": 419400 }, { "epoch": 10.75, "learning_rate": 1.4178365269665608e-05, "loss": 2.3857, "step": 419500 }, { "epoch": 10.75, "learning_rate": 1.4169826143389009e-05, "loss": 2.2709, "step": 419600 }, { "epoch": 10.75, "learning_rate": 1.4161287017112409e-05, "loss": 2.4978, "step": 419700 }, { "epoch": 10.75, "learning_rate": 1.415274789083581e-05, "loss": 2.4434, "step": 419800 }, { "epoch": 10.76, "learning_rate": 1.414420876455921e-05, "loss": 2.4206, "step": 419900 }, { "epoch": 10.76, "learning_rate": 1.4135669638282612e-05, "loss": 2.3937, "step": 420000 }, { "epoch": 10.76, "learning_rate": 1.4127130512006012e-05, "loss": 2.3275, "step": 420100 }, { "epoch": 10.76, "learning_rate": 1.4118591385729413e-05, "loss": 2.3028, "step": 420200 }, { "epoch": 10.77, "learning_rate": 1.4110052259452813e-05, "loss": 2.458, "step": 420300 }, { "epoch": 10.77, "learning_rate": 1.4101513133176215e-05, "loss": 2.5485, "step": 420400 }, { "epoch": 10.77, "learning_rate": 1.4092974006899615e-05, "loss": 2.4615, "step": 420500 }, { "epoch": 10.77, "learning_rate": 1.4084434880623016e-05, "loss": 2.4002, "step": 420600 }, { "epoch": 10.78, "learning_rate": 1.4075895754346416e-05, "loss": 2.3733, "step": 420700 }, { "epoch": 10.78, "learning_rate": 1.4067356628069816e-05, "loss": 2.4245, "step": 420800 }, { "epoch": 10.78, "learning_rate": 1.4058817501793217e-05, "loss": 2.3932, "step": 420900 }, { "epoch": 10.78, "learning_rate": 1.4050278375516617e-05, "loss": 2.5509, "step": 421000 }, { "epoch": 10.79, "learning_rate": 1.4041739249240019e-05, "loss": 2.3252, "step": 421100 }, { "epoch": 10.79, "learning_rate": 1.4033200122963419e-05, "loss": 2.4302, "step": 421200 }, { "epoch": 10.79, "learning_rate": 1.402466099668682e-05, "loss": 2.4215, "step": 421300 }, { "epoch": 10.8, "learning_rate": 1.401612187041022e-05, "loss": 2.314, "step": 421400 }, { "epoch": 10.8, "learning_rate": 1.4007582744133622e-05, "loss": 2.3784, "step": 421500 }, { "epoch": 10.8, "learning_rate": 1.3999043617857021e-05, "loss": 2.4432, "step": 421600 }, { "epoch": 10.8, "learning_rate": 1.3990504491580423e-05, "loss": 2.2759, "step": 421700 }, { "epoch": 10.81, "learning_rate": 1.3981965365303823e-05, "loss": 2.2875, "step": 421800 }, { "epoch": 10.81, "learning_rate": 1.3973426239027224e-05, "loss": 2.3403, "step": 421900 }, { "epoch": 10.81, "learning_rate": 1.3964887112750624e-05, "loss": 2.4533, "step": 422000 }, { "epoch": 10.81, "learning_rate": 1.3956347986474024e-05, "loss": 2.3412, "step": 422100 }, { "epoch": 10.82, "learning_rate": 1.3947808860197426e-05, "loss": 2.5796, "step": 422200 }, { "epoch": 10.82, "learning_rate": 1.3939269733920826e-05, "loss": 2.275, "step": 422300 }, { "epoch": 10.82, "learning_rate": 1.3930730607644227e-05, "loss": 2.3748, "step": 422400 }, { "epoch": 10.82, "learning_rate": 1.3922191481367627e-05, "loss": 2.5889, "step": 422500 }, { "epoch": 10.83, "learning_rate": 1.3913652355091028e-05, "loss": 2.5048, "step": 422600 }, { "epoch": 10.83, "learning_rate": 1.3905113228814428e-05, "loss": 2.4468, "step": 422700 }, { "epoch": 10.83, "learning_rate": 1.389657410253783e-05, "loss": 2.229, "step": 422800 }, { "epoch": 10.83, "learning_rate": 1.388803497626123e-05, "loss": 2.4576, "step": 422900 }, { "epoch": 10.84, "learning_rate": 1.3879495849984631e-05, "loss": 2.4826, "step": 423000 }, { "epoch": 10.84, "learning_rate": 1.3870956723708031e-05, "loss": 2.436, "step": 423100 }, { "epoch": 10.84, "learning_rate": 1.3862417597431433e-05, "loss": 2.3486, "step": 423200 }, { "epoch": 10.84, "learning_rate": 1.3853878471154833e-05, "loss": 2.4808, "step": 423300 }, { "epoch": 10.85, "learning_rate": 1.3845339344878232e-05, "loss": 2.5013, "step": 423400 }, { "epoch": 10.85, "learning_rate": 1.3836800218601634e-05, "loss": 2.4706, "step": 423500 }, { "epoch": 10.85, "learning_rate": 1.3828261092325034e-05, "loss": 2.3887, "step": 423600 }, { "epoch": 10.85, "learning_rate": 1.3819721966048435e-05, "loss": 2.3089, "step": 423700 }, { "epoch": 10.86, "learning_rate": 1.3811182839771835e-05, "loss": 2.3603, "step": 423800 }, { "epoch": 10.86, "learning_rate": 1.3802643713495237e-05, "loss": 2.2136, "step": 423900 }, { "epoch": 10.86, "learning_rate": 1.3794104587218637e-05, "loss": 2.3481, "step": 424000 }, { "epoch": 10.86, "learning_rate": 1.3785565460942038e-05, "loss": 2.2901, "step": 424100 }, { "epoch": 10.87, "learning_rate": 1.3777026334665438e-05, "loss": 2.3176, "step": 424200 }, { "epoch": 10.87, "learning_rate": 1.376848720838884e-05, "loss": 2.4147, "step": 424300 }, { "epoch": 10.87, "learning_rate": 1.375994808211224e-05, "loss": 2.5023, "step": 424400 }, { "epoch": 10.87, "learning_rate": 1.3751408955835641e-05, "loss": 2.468, "step": 424500 }, { "epoch": 10.88, "learning_rate": 1.374286982955904e-05, "loss": 2.3227, "step": 424600 }, { "epoch": 10.88, "learning_rate": 1.373433070328244e-05, "loss": 2.3984, "step": 424700 }, { "epoch": 10.88, "learning_rate": 1.3725791577005842e-05, "loss": 2.393, "step": 424800 }, { "epoch": 10.88, "learning_rate": 1.3717252450729242e-05, "loss": 2.4668, "step": 424900 }, { "epoch": 10.89, "learning_rate": 1.3708713324452644e-05, "loss": 2.5237, "step": 425000 }, { "epoch": 10.89, "learning_rate": 1.3700174198176043e-05, "loss": 2.3748, "step": 425100 }, { "epoch": 10.89, "learning_rate": 1.3691635071899445e-05, "loss": 2.3424, "step": 425200 }, { "epoch": 10.9, "learning_rate": 1.3683095945622845e-05, "loss": 2.4823, "step": 425300 }, { "epoch": 10.9, "learning_rate": 1.3674556819346246e-05, "loss": 2.4264, "step": 425400 }, { "epoch": 10.9, "learning_rate": 1.3666017693069646e-05, "loss": 2.459, "step": 425500 }, { "epoch": 10.9, "learning_rate": 1.3657478566793048e-05, "loss": 2.4273, "step": 425600 }, { "epoch": 10.91, "learning_rate": 1.3648939440516448e-05, "loss": 2.3875, "step": 425700 }, { "epoch": 10.91, "learning_rate": 1.364040031423985e-05, "loss": 2.39, "step": 425800 }, { "epoch": 10.91, "learning_rate": 1.3631861187963249e-05, "loss": 2.5833, "step": 425900 }, { "epoch": 10.91, "learning_rate": 1.3623322061686649e-05, "loss": 2.5368, "step": 426000 }, { "epoch": 10.92, "learning_rate": 1.361478293541005e-05, "loss": 2.2617, "step": 426100 }, { "epoch": 10.92, "learning_rate": 1.360624380913345e-05, "loss": 2.4038, "step": 426200 }, { "epoch": 10.92, "learning_rate": 1.3597704682856852e-05, "loss": 2.3052, "step": 426300 }, { "epoch": 10.92, "learning_rate": 1.3589165556580252e-05, "loss": 2.3489, "step": 426400 }, { "epoch": 10.93, "learning_rate": 1.3580626430303653e-05, "loss": 2.4012, "step": 426500 }, { "epoch": 10.93, "learning_rate": 1.3572087304027053e-05, "loss": 2.3726, "step": 426600 }, { "epoch": 10.93, "learning_rate": 1.3563548177750455e-05, "loss": 2.3671, "step": 426700 }, { "epoch": 10.93, "learning_rate": 1.3555009051473855e-05, "loss": 2.3331, "step": 426800 }, { "epoch": 10.94, "learning_rate": 1.3546469925197256e-05, "loss": 2.2823, "step": 426900 }, { "epoch": 10.94, "learning_rate": 1.3537930798920656e-05, "loss": 2.4966, "step": 427000 }, { "epoch": 10.94, "learning_rate": 1.3529391672644057e-05, "loss": 2.233, "step": 427100 }, { "epoch": 10.94, "learning_rate": 1.3520852546367457e-05, "loss": 2.5499, "step": 427200 }, { "epoch": 10.95, "learning_rate": 1.3512313420090857e-05, "loss": 2.4112, "step": 427300 }, { "epoch": 10.95, "learning_rate": 1.3503774293814259e-05, "loss": 2.2821, "step": 427400 }, { "epoch": 10.95, "learning_rate": 1.3495235167537659e-05, "loss": 2.3467, "step": 427500 }, { "epoch": 10.95, "learning_rate": 1.348669604126106e-05, "loss": 2.436, "step": 427600 }, { "epoch": 10.96, "learning_rate": 1.347815691498446e-05, "loss": 2.3273, "step": 427700 }, { "epoch": 10.96, "learning_rate": 1.3469617788707862e-05, "loss": 2.3415, "step": 427800 }, { "epoch": 10.96, "learning_rate": 1.3461078662431261e-05, "loss": 2.4805, "step": 427900 }, { "epoch": 10.96, "learning_rate": 1.3452539536154663e-05, "loss": 2.1796, "step": 428000 }, { "epoch": 10.97, "learning_rate": 1.3444000409878063e-05, "loss": 2.2917, "step": 428100 }, { "epoch": 10.97, "learning_rate": 1.3435461283601464e-05, "loss": 2.3663, "step": 428200 }, { "epoch": 10.97, "learning_rate": 1.3426922157324864e-05, "loss": 2.3382, "step": 428300 }, { "epoch": 10.97, "learning_rate": 1.3418383031048266e-05, "loss": 2.5369, "step": 428400 }, { "epoch": 10.98, "learning_rate": 1.3409843904771666e-05, "loss": 2.4268, "step": 428500 }, { "epoch": 10.98, "learning_rate": 1.3401304778495065e-05, "loss": 2.6242, "step": 428600 }, { "epoch": 10.98, "learning_rate": 1.3392765652218467e-05, "loss": 2.2714, "step": 428700 }, { "epoch": 10.98, "learning_rate": 1.3384226525941867e-05, "loss": 2.5422, "step": 428800 }, { "epoch": 10.99, "learning_rate": 1.3375687399665268e-05, "loss": 2.4616, "step": 428900 }, { "epoch": 10.99, "learning_rate": 1.3367148273388668e-05, "loss": 2.47, "step": 429000 }, { "epoch": 10.99, "learning_rate": 1.335860914711207e-05, "loss": 2.4516, "step": 429100 }, { "epoch": 10.99, "learning_rate": 1.335007002083547e-05, "loss": 2.26, "step": 429200 }, { "epoch": 11.0, "learning_rate": 1.3341530894558871e-05, "loss": 2.2387, "step": 429300 }, { "epoch": 11.0, "learning_rate": 1.3332991768282271e-05, "loss": 2.3875, "step": 429400 }, { "epoch": 11.0, "learning_rate": 1.3324452642005673e-05, "loss": 2.441, "step": 429500 }, { "epoch": 11.01, "learning_rate": 1.3315913515729069e-05, "loss": 2.4267, "step": 429600 }, { "epoch": 11.01, "learning_rate": 1.330737438945247e-05, "loss": 2.5106, "step": 429700 }, { "epoch": 11.01, "learning_rate": 1.329883526317587e-05, "loss": 2.2311, "step": 429800 }, { "epoch": 11.01, "learning_rate": 1.3290296136899272e-05, "loss": 2.3798, "step": 429900 }, { "epoch": 11.02, "learning_rate": 1.3281757010622672e-05, "loss": 2.313, "step": 430000 }, { "epoch": 11.02, "learning_rate": 1.3273217884346073e-05, "loss": 2.2904, "step": 430100 }, { "epoch": 11.02, "learning_rate": 1.3264678758069473e-05, "loss": 2.461, "step": 430200 }, { "epoch": 11.02, "learning_rate": 1.3256139631792875e-05, "loss": 2.422, "step": 430300 }, { "epoch": 11.03, "learning_rate": 1.3247600505516275e-05, "loss": 2.5934, "step": 430400 }, { "epoch": 11.03, "learning_rate": 1.3239061379239676e-05, "loss": 2.4207, "step": 430500 }, { "epoch": 11.03, "learning_rate": 1.3230522252963076e-05, "loss": 2.3234, "step": 430600 }, { "epoch": 11.03, "learning_rate": 1.3221983126686478e-05, "loss": 2.2679, "step": 430700 }, { "epoch": 11.04, "learning_rate": 1.3213444000409877e-05, "loss": 2.5224, "step": 430800 }, { "epoch": 11.04, "learning_rate": 1.3204904874133277e-05, "loss": 2.6412, "step": 430900 }, { "epoch": 11.04, "learning_rate": 1.3196365747856679e-05, "loss": 2.3197, "step": 431000 }, { "epoch": 11.04, "learning_rate": 1.3187826621580079e-05, "loss": 2.3307, "step": 431100 }, { "epoch": 11.05, "learning_rate": 1.317928749530348e-05, "loss": 2.3558, "step": 431200 }, { "epoch": 11.05, "learning_rate": 1.317074836902688e-05, "loss": 2.4468, "step": 431300 }, { "epoch": 11.05, "learning_rate": 1.3162209242750282e-05, "loss": 2.2056, "step": 431400 }, { "epoch": 11.05, "learning_rate": 1.3153670116473682e-05, "loss": 2.4161, "step": 431500 }, { "epoch": 11.06, "learning_rate": 1.3145130990197083e-05, "loss": 2.3587, "step": 431600 }, { "epoch": 11.06, "learning_rate": 1.3136591863920483e-05, "loss": 2.488, "step": 431700 }, { "epoch": 11.06, "learning_rate": 1.3128052737643884e-05, "loss": 2.3696, "step": 431800 }, { "epoch": 11.06, "learning_rate": 1.3119513611367284e-05, "loss": 2.4558, "step": 431900 }, { "epoch": 11.07, "learning_rate": 1.3110974485090686e-05, "loss": 2.3515, "step": 432000 }, { "epoch": 11.07, "learning_rate": 1.3102435358814086e-05, "loss": 2.1881, "step": 432100 }, { "epoch": 11.07, "learning_rate": 1.3093896232537486e-05, "loss": 2.3457, "step": 432200 }, { "epoch": 11.07, "learning_rate": 1.3085357106260887e-05, "loss": 2.3209, "step": 432300 }, { "epoch": 11.08, "learning_rate": 1.3076817979984287e-05, "loss": 2.3805, "step": 432400 }, { "epoch": 11.08, "learning_rate": 1.3068278853707689e-05, "loss": 2.4787, "step": 432500 }, { "epoch": 11.08, "learning_rate": 1.3059739727431088e-05, "loss": 2.4628, "step": 432600 }, { "epoch": 11.08, "learning_rate": 1.305120060115449e-05, "loss": 2.4036, "step": 432700 }, { "epoch": 11.09, "learning_rate": 1.304266147487789e-05, "loss": 2.2851, "step": 432800 }, { "epoch": 11.09, "learning_rate": 1.3034122348601291e-05, "loss": 2.3833, "step": 432900 }, { "epoch": 11.09, "learning_rate": 1.3025583222324691e-05, "loss": 2.2168, "step": 433000 }, { "epoch": 11.09, "learning_rate": 1.3017044096048093e-05, "loss": 2.4524, "step": 433100 }, { "epoch": 11.1, "learning_rate": 1.3008504969771493e-05, "loss": 2.3643, "step": 433200 }, { "epoch": 11.1, "learning_rate": 1.2999965843494894e-05, "loss": 2.335, "step": 433300 }, { "epoch": 11.1, "learning_rate": 1.2991426717218294e-05, "loss": 2.457, "step": 433400 }, { "epoch": 11.11, "learning_rate": 1.2982887590941694e-05, "loss": 2.2603, "step": 433500 }, { "epoch": 11.11, "learning_rate": 1.2974348464665095e-05, "loss": 2.3553, "step": 433600 }, { "epoch": 11.11, "learning_rate": 1.2965809338388495e-05, "loss": 2.4858, "step": 433700 }, { "epoch": 11.11, "learning_rate": 1.2957270212111897e-05, "loss": 2.5419, "step": 433800 }, { "epoch": 11.12, "learning_rate": 1.2948731085835297e-05, "loss": 2.3164, "step": 433900 }, { "epoch": 11.12, "learning_rate": 1.2940191959558698e-05, "loss": 2.333, "step": 434000 }, { "epoch": 11.12, "learning_rate": 1.2931652833282098e-05, "loss": 2.2533, "step": 434100 }, { "epoch": 11.12, "learning_rate": 1.29231137070055e-05, "loss": 2.5749, "step": 434200 }, { "epoch": 11.13, "learning_rate": 1.29145745807289e-05, "loss": 2.4057, "step": 434300 }, { "epoch": 11.13, "learning_rate": 1.2906035454452301e-05, "loss": 2.3012, "step": 434400 }, { "epoch": 11.13, "learning_rate": 1.2897496328175701e-05, "loss": 2.4791, "step": 434500 }, { "epoch": 11.13, "learning_rate": 1.2888957201899102e-05, "loss": 2.4412, "step": 434600 }, { "epoch": 11.14, "learning_rate": 1.2880418075622502e-05, "loss": 2.3738, "step": 434700 }, { "epoch": 11.14, "learning_rate": 1.2871878949345904e-05, "loss": 2.3775, "step": 434800 }, { "epoch": 11.14, "learning_rate": 1.2863339823069304e-05, "loss": 2.391, "step": 434900 }, { "epoch": 11.14, "learning_rate": 1.2854800696792704e-05, "loss": 2.437, "step": 435000 }, { "epoch": 11.15, "learning_rate": 1.2846261570516105e-05, "loss": 2.4915, "step": 435100 }, { "epoch": 11.15, "learning_rate": 1.2837722444239505e-05, "loss": 2.5653, "step": 435200 }, { "epoch": 11.15, "learning_rate": 1.2829183317962906e-05, "loss": 2.3941, "step": 435300 }, { "epoch": 11.15, "learning_rate": 1.2820644191686306e-05, "loss": 2.6094, "step": 435400 }, { "epoch": 11.16, "learning_rate": 1.2812105065409708e-05, "loss": 2.3746, "step": 435500 }, { "epoch": 11.16, "learning_rate": 1.2803565939133108e-05, "loss": 2.5318, "step": 435600 }, { "epoch": 11.16, "learning_rate": 1.279502681285651e-05, "loss": 2.3462, "step": 435700 }, { "epoch": 11.16, "learning_rate": 1.2786487686579909e-05, "loss": 2.3414, "step": 435800 }, { "epoch": 11.17, "learning_rate": 1.277794856030331e-05, "loss": 2.4234, "step": 435900 }, { "epoch": 11.17, "learning_rate": 1.276940943402671e-05, "loss": 2.3881, "step": 436000 }, { "epoch": 11.17, "learning_rate": 1.2760870307750112e-05, "loss": 2.4492, "step": 436100 }, { "epoch": 11.17, "learning_rate": 1.2752331181473512e-05, "loss": 2.3043, "step": 436200 }, { "epoch": 11.18, "learning_rate": 1.2743792055196912e-05, "loss": 2.2461, "step": 436300 }, { "epoch": 11.18, "learning_rate": 1.2735252928920313e-05, "loss": 2.4854, "step": 436400 }, { "epoch": 11.18, "learning_rate": 1.2726713802643713e-05, "loss": 2.3799, "step": 436500 }, { "epoch": 11.18, "learning_rate": 1.2718174676367115e-05, "loss": 2.4182, "step": 436600 }, { "epoch": 11.19, "learning_rate": 1.2709635550090515e-05, "loss": 2.3611, "step": 436700 }, { "epoch": 11.19, "learning_rate": 1.2701096423813916e-05, "loss": 2.3318, "step": 436800 }, { "epoch": 11.19, "learning_rate": 1.2692557297537316e-05, "loss": 2.3417, "step": 436900 }, { "epoch": 11.19, "learning_rate": 1.2684018171260718e-05, "loss": 2.5033, "step": 437000 }, { "epoch": 11.2, "learning_rate": 1.2675479044984117e-05, "loss": 2.371, "step": 437100 }, { "epoch": 11.2, "learning_rate": 1.2666939918707519e-05, "loss": 2.4119, "step": 437200 }, { "epoch": 11.2, "learning_rate": 1.2658400792430919e-05, "loss": 2.3906, "step": 437300 }, { "epoch": 11.21, "learning_rate": 1.264986166615432e-05, "loss": 2.3757, "step": 437400 }, { "epoch": 11.21, "learning_rate": 1.264132253987772e-05, "loss": 2.3541, "step": 437500 }, { "epoch": 11.21, "learning_rate": 1.263278341360112e-05, "loss": 2.3389, "step": 437600 }, { "epoch": 11.21, "learning_rate": 1.2624244287324522e-05, "loss": 2.4163, "step": 437700 }, { "epoch": 11.22, "learning_rate": 1.2615705161047921e-05, "loss": 2.2918, "step": 437800 }, { "epoch": 11.22, "learning_rate": 1.2607166034771323e-05, "loss": 2.4072, "step": 437900 }, { "epoch": 11.22, "learning_rate": 1.2598626908494723e-05, "loss": 2.2518, "step": 438000 }, { "epoch": 11.22, "learning_rate": 1.2590087782218124e-05, "loss": 2.4738, "step": 438100 }, { "epoch": 11.23, "learning_rate": 1.2581548655941524e-05, "loss": 2.2788, "step": 438200 }, { "epoch": 11.23, "learning_rate": 1.2573009529664926e-05, "loss": 2.3523, "step": 438300 }, { "epoch": 11.23, "learning_rate": 1.2564470403388326e-05, "loss": 2.5445, "step": 438400 }, { "epoch": 11.23, "learning_rate": 1.2555931277111727e-05, "loss": 2.391, "step": 438500 }, { "epoch": 11.24, "learning_rate": 1.2547392150835127e-05, "loss": 2.4425, "step": 438600 }, { "epoch": 11.24, "learning_rate": 1.2538853024558529e-05, "loss": 2.4095, "step": 438700 }, { "epoch": 11.24, "learning_rate": 1.2530313898281928e-05, "loss": 2.5371, "step": 438800 }, { "epoch": 11.24, "learning_rate": 1.2521774772005328e-05, "loss": 2.3896, "step": 438900 }, { "epoch": 11.25, "learning_rate": 1.251323564572873e-05, "loss": 2.4364, "step": 439000 }, { "epoch": 11.25, "learning_rate": 1.250469651945213e-05, "loss": 2.4755, "step": 439100 }, { "epoch": 11.25, "learning_rate": 1.2496157393175531e-05, "loss": 2.2854, "step": 439200 }, { "epoch": 11.25, "learning_rate": 1.2487618266898931e-05, "loss": 2.5707, "step": 439300 }, { "epoch": 11.26, "learning_rate": 1.2479079140622333e-05, "loss": 2.3527, "step": 439400 }, { "epoch": 11.26, "learning_rate": 1.2470540014345733e-05, "loss": 2.4184, "step": 439500 }, { "epoch": 11.26, "learning_rate": 1.2462000888069134e-05, "loss": 2.2633, "step": 439600 }, { "epoch": 11.26, "learning_rate": 1.2453461761792534e-05, "loss": 2.2966, "step": 439700 }, { "epoch": 11.27, "learning_rate": 1.2444922635515936e-05, "loss": 2.3354, "step": 439800 }, { "epoch": 11.27, "learning_rate": 1.2436383509239335e-05, "loss": 2.2734, "step": 439900 }, { "epoch": 11.27, "learning_rate": 1.2427844382962737e-05, "loss": 2.2729, "step": 440000 }, { "epoch": 11.27, "learning_rate": 1.2419305256686137e-05, "loss": 2.4983, "step": 440100 }, { "epoch": 11.28, "learning_rate": 1.2410766130409537e-05, "loss": 2.3772, "step": 440200 }, { "epoch": 11.28, "learning_rate": 1.2402227004132938e-05, "loss": 2.4128, "step": 440300 }, { "epoch": 11.28, "learning_rate": 1.2393687877856338e-05, "loss": 2.3746, "step": 440400 }, { "epoch": 11.28, "learning_rate": 1.238514875157974e-05, "loss": 2.4524, "step": 440500 }, { "epoch": 11.29, "learning_rate": 1.237660962530314e-05, "loss": 2.41, "step": 440600 }, { "epoch": 11.29, "learning_rate": 1.2368070499026541e-05, "loss": 2.3369, "step": 440700 }, { "epoch": 11.29, "learning_rate": 1.235953137274994e-05, "loss": 2.3432, "step": 440800 }, { "epoch": 11.29, "learning_rate": 1.2350992246473342e-05, "loss": 2.4829, "step": 440900 }, { "epoch": 11.3, "learning_rate": 1.2342453120196742e-05, "loss": 2.4246, "step": 441000 }, { "epoch": 11.3, "learning_rate": 1.2333913993920144e-05, "loss": 2.5314, "step": 441100 }, { "epoch": 11.3, "learning_rate": 1.2325374867643544e-05, "loss": 2.3748, "step": 441200 }, { "epoch": 11.3, "learning_rate": 1.2316835741366945e-05, "loss": 2.4952, "step": 441300 }, { "epoch": 11.31, "learning_rate": 1.2308296615090345e-05, "loss": 2.3488, "step": 441400 }, { "epoch": 11.31, "learning_rate": 1.2299757488813745e-05, "loss": 2.5501, "step": 441500 }, { "epoch": 11.31, "learning_rate": 1.2291218362537146e-05, "loss": 2.3405, "step": 441600 }, { "epoch": 11.32, "learning_rate": 1.2282679236260546e-05, "loss": 2.5885, "step": 441700 }, { "epoch": 11.32, "learning_rate": 1.2274140109983948e-05, "loss": 2.3693, "step": 441800 }, { "epoch": 11.32, "learning_rate": 1.2265600983707348e-05, "loss": 2.4954, "step": 441900 }, { "epoch": 11.32, "learning_rate": 1.225706185743075e-05, "loss": 2.4335, "step": 442000 }, { "epoch": 11.33, "learning_rate": 1.2248522731154149e-05, "loss": 2.3323, "step": 442100 }, { "epoch": 11.33, "learning_rate": 1.223998360487755e-05, "loss": 2.4281, "step": 442200 }, { "epoch": 11.33, "learning_rate": 1.223144447860095e-05, "loss": 2.4198, "step": 442300 }, { "epoch": 11.33, "learning_rate": 1.2222905352324352e-05, "loss": 2.515, "step": 442400 }, { "epoch": 11.34, "learning_rate": 1.2214366226047752e-05, "loss": 2.2983, "step": 442500 }, { "epoch": 11.34, "learning_rate": 1.2205827099771153e-05, "loss": 2.401, "step": 442600 }, { "epoch": 11.34, "learning_rate": 1.2197287973494552e-05, "loss": 2.3863, "step": 442700 }, { "epoch": 11.34, "learning_rate": 1.2188748847217953e-05, "loss": 2.3331, "step": 442800 }, { "epoch": 11.35, "learning_rate": 1.2180209720941353e-05, "loss": 2.4432, "step": 442900 }, { "epoch": 11.35, "learning_rate": 1.2171670594664755e-05, "loss": 2.4303, "step": 443000 }, { "epoch": 11.35, "learning_rate": 1.2163131468388154e-05, "loss": 2.3619, "step": 443100 }, { "epoch": 11.35, "learning_rate": 1.2154592342111556e-05, "loss": 2.18, "step": 443200 }, { "epoch": 11.36, "learning_rate": 1.2146053215834956e-05, "loss": 2.3913, "step": 443300 }, { "epoch": 11.36, "learning_rate": 1.2137514089558356e-05, "loss": 2.4747, "step": 443400 }, { "epoch": 11.36, "learning_rate": 1.2128974963281757e-05, "loss": 2.3488, "step": 443500 }, { "epoch": 11.36, "learning_rate": 1.2120435837005157e-05, "loss": 2.4871, "step": 443600 }, { "epoch": 11.37, "learning_rate": 1.2111896710728559e-05, "loss": 2.3957, "step": 443700 }, { "epoch": 11.37, "learning_rate": 1.2103357584451958e-05, "loss": 2.5369, "step": 443800 }, { "epoch": 11.37, "learning_rate": 1.209481845817536e-05, "loss": 2.4831, "step": 443900 }, { "epoch": 11.37, "learning_rate": 1.208627933189876e-05, "loss": 2.3505, "step": 444000 }, { "epoch": 11.38, "learning_rate": 1.2077740205622161e-05, "loss": 2.4901, "step": 444100 }, { "epoch": 11.38, "learning_rate": 1.2069201079345561e-05, "loss": 2.3502, "step": 444200 }, { "epoch": 11.38, "learning_rate": 1.2060661953068963e-05, "loss": 2.2579, "step": 444300 }, { "epoch": 11.38, "learning_rate": 1.2052122826792363e-05, "loss": 2.3426, "step": 444400 }, { "epoch": 11.39, "learning_rate": 1.2043583700515764e-05, "loss": 2.2187, "step": 444500 }, { "epoch": 11.39, "learning_rate": 1.2035044574239164e-05, "loss": 2.2769, "step": 444600 }, { "epoch": 11.39, "learning_rate": 1.2026505447962564e-05, "loss": 2.4459, "step": 444700 }, { "epoch": 11.39, "learning_rate": 1.2017966321685965e-05, "loss": 2.3774, "step": 444800 }, { "epoch": 11.4, "learning_rate": 1.2009427195409365e-05, "loss": 2.4176, "step": 444900 }, { "epoch": 11.4, "learning_rate": 1.2000888069132767e-05, "loss": 2.3299, "step": 445000 }, { "epoch": 11.4, "learning_rate": 1.1992348942856167e-05, "loss": 2.2312, "step": 445100 }, { "epoch": 11.4, "learning_rate": 1.1983809816579568e-05, "loss": 2.3771, "step": 445200 }, { "epoch": 11.41, "learning_rate": 1.1975270690302968e-05, "loss": 2.3815, "step": 445300 }, { "epoch": 11.41, "learning_rate": 1.196673156402637e-05, "loss": 2.3026, "step": 445400 }, { "epoch": 11.41, "learning_rate": 1.195819243774977e-05, "loss": 2.486, "step": 445500 }, { "epoch": 11.42, "learning_rate": 1.1949653311473171e-05, "loss": 2.4579, "step": 445600 }, { "epoch": 11.42, "learning_rate": 1.1941114185196571e-05, "loss": 2.298, "step": 445700 }, { "epoch": 11.42, "learning_rate": 1.1932575058919972e-05, "loss": 2.2979, "step": 445800 }, { "epoch": 11.42, "learning_rate": 1.1924035932643372e-05, "loss": 2.5232, "step": 445900 }, { "epoch": 11.43, "learning_rate": 1.1915496806366772e-05, "loss": 2.289, "step": 446000 }, { "epoch": 11.43, "learning_rate": 1.1906957680090174e-05, "loss": 2.2656, "step": 446100 }, { "epoch": 11.43, "learning_rate": 1.1898418553813574e-05, "loss": 2.3435, "step": 446200 }, { "epoch": 11.43, "learning_rate": 1.1889879427536975e-05, "loss": 2.3753, "step": 446300 }, { "epoch": 11.44, "learning_rate": 1.1881340301260375e-05, "loss": 2.4717, "step": 446400 }, { "epoch": 11.44, "learning_rate": 1.1872801174983777e-05, "loss": 2.4375, "step": 446500 }, { "epoch": 11.44, "learning_rate": 1.1864262048707176e-05, "loss": 2.3796, "step": 446600 }, { "epoch": 11.44, "learning_rate": 1.1855722922430578e-05, "loss": 2.3647, "step": 446700 }, { "epoch": 11.45, "learning_rate": 1.1847183796153978e-05, "loss": 2.5385, "step": 446800 }, { "epoch": 11.45, "learning_rate": 1.183864466987738e-05, "loss": 2.3397, "step": 446900 }, { "epoch": 11.45, "learning_rate": 1.183010554360078e-05, "loss": 2.3079, "step": 447000 }, { "epoch": 11.45, "learning_rate": 1.182156641732418e-05, "loss": 2.4263, "step": 447100 }, { "epoch": 11.46, "learning_rate": 1.181302729104758e-05, "loss": 2.3297, "step": 447200 }, { "epoch": 11.46, "learning_rate": 1.180448816477098e-05, "loss": 2.2843, "step": 447300 }, { "epoch": 11.46, "learning_rate": 1.1795949038494382e-05, "loss": 2.3753, "step": 447400 }, { "epoch": 11.46, "learning_rate": 1.1787409912217782e-05, "loss": 2.448, "step": 447500 }, { "epoch": 11.47, "learning_rate": 1.1778870785941183e-05, "loss": 2.2821, "step": 447600 }, { "epoch": 11.47, "learning_rate": 1.1770331659664583e-05, "loss": 2.4746, "step": 447700 }, { "epoch": 11.47, "learning_rate": 1.1761792533387985e-05, "loss": 2.4732, "step": 447800 }, { "epoch": 11.47, "learning_rate": 1.1753253407111385e-05, "loss": 2.3822, "step": 447900 }, { "epoch": 11.48, "learning_rate": 1.1744714280834786e-05, "loss": 2.3055, "step": 448000 }, { "epoch": 11.48, "learning_rate": 1.1736175154558186e-05, "loss": 2.2832, "step": 448100 }, { "epoch": 11.48, "learning_rate": 1.1727636028281588e-05, "loss": 2.4172, "step": 448200 }, { "epoch": 11.48, "learning_rate": 1.1719096902004987e-05, "loss": 2.5879, "step": 448300 }, { "epoch": 11.49, "learning_rate": 1.1710557775728389e-05, "loss": 2.3739, "step": 448400 }, { "epoch": 11.49, "learning_rate": 1.1702018649451789e-05, "loss": 2.412, "step": 448500 }, { "epoch": 11.49, "learning_rate": 1.1693479523175189e-05, "loss": 2.2493, "step": 448600 }, { "epoch": 11.49, "learning_rate": 1.168494039689859e-05, "loss": 2.5437, "step": 448700 }, { "epoch": 11.5, "learning_rate": 1.167640127062199e-05, "loss": 2.4373, "step": 448800 }, { "epoch": 11.5, "learning_rate": 1.1667862144345392e-05, "loss": 2.3772, "step": 448900 }, { "epoch": 11.5, "learning_rate": 1.1659323018068792e-05, "loss": 2.491, "step": 449000 }, { "epoch": 11.5, "learning_rate": 1.1650783891792193e-05, "loss": 2.3499, "step": 449100 }, { "epoch": 11.51, "learning_rate": 1.1642244765515593e-05, "loss": 2.4852, "step": 449200 }, { "epoch": 11.51, "learning_rate": 1.1633705639238994e-05, "loss": 2.2343, "step": 449300 }, { "epoch": 11.51, "learning_rate": 1.1625166512962394e-05, "loss": 2.2328, "step": 449400 }, { "epoch": 11.52, "learning_rate": 1.1616627386685796e-05, "loss": 2.293, "step": 449500 }, { "epoch": 11.52, "learning_rate": 1.1608088260409196e-05, "loss": 2.2728, "step": 449600 }, { "epoch": 11.52, "learning_rate": 1.1599549134132597e-05, "loss": 2.3773, "step": 449700 }, { "epoch": 11.52, "learning_rate": 1.1591010007855997e-05, "loss": 2.5609, "step": 449800 }, { "epoch": 11.53, "learning_rate": 1.1582470881579397e-05, "loss": 2.3769, "step": 449900 }, { "epoch": 11.53, "learning_rate": 1.1573931755302799e-05, "loss": 2.4467, "step": 450000 }, { "epoch": 11.53, "learning_rate": 1.1565392629026198e-05, "loss": 2.4738, "step": 450100 }, { "epoch": 11.53, "learning_rate": 1.15568535027496e-05, "loss": 2.3608, "step": 450200 }, { "epoch": 11.54, "learning_rate": 1.1548314376473e-05, "loss": 2.307, "step": 450300 }, { "epoch": 11.54, "learning_rate": 1.1539775250196401e-05, "loss": 2.386, "step": 450400 }, { "epoch": 11.54, "learning_rate": 1.1531236123919801e-05, "loss": 2.5637, "step": 450500 }, { "epoch": 11.54, "learning_rate": 1.1522696997643203e-05, "loss": 2.3824, "step": 450600 }, { "epoch": 11.55, "learning_rate": 1.1514157871366603e-05, "loss": 2.4299, "step": 450700 }, { "epoch": 11.55, "learning_rate": 1.1505618745090004e-05, "loss": 2.359, "step": 450800 }, { "epoch": 11.55, "learning_rate": 1.1497079618813404e-05, "loss": 2.4056, "step": 450900 }, { "epoch": 11.55, "learning_rate": 1.1488540492536806e-05, "loss": 2.4729, "step": 451000 }, { "epoch": 11.56, "learning_rate": 1.1480001366260205e-05, "loss": 2.4801, "step": 451100 }, { "epoch": 11.56, "learning_rate": 1.1471462239983605e-05, "loss": 2.4627, "step": 451200 }, { "epoch": 11.56, "learning_rate": 1.1462923113707007e-05, "loss": 2.2652, "step": 451300 }, { "epoch": 11.56, "learning_rate": 1.1454383987430407e-05, "loss": 2.3068, "step": 451400 }, { "epoch": 11.57, "learning_rate": 1.1445844861153807e-05, "loss": 2.4512, "step": 451500 }, { "epoch": 11.57, "learning_rate": 1.1437305734877208e-05, "loss": 2.382, "step": 451600 }, { "epoch": 11.57, "learning_rate": 1.1428766608600608e-05, "loss": 2.2647, "step": 451700 }, { "epoch": 11.57, "learning_rate": 1.1420227482324008e-05, "loss": 2.3449, "step": 451800 }, { "epoch": 11.58, "learning_rate": 1.141168835604741e-05, "loss": 2.3118, "step": 451900 }, { "epoch": 11.58, "learning_rate": 1.140314922977081e-05, "loss": 2.4311, "step": 452000 }, { "epoch": 11.58, "learning_rate": 1.139461010349421e-05, "loss": 2.2573, "step": 452100 }, { "epoch": 11.58, "learning_rate": 1.138607097721761e-05, "loss": 2.2957, "step": 452200 }, { "epoch": 11.59, "learning_rate": 1.1377531850941012e-05, "loss": 2.4046, "step": 452300 }, { "epoch": 11.59, "learning_rate": 1.1368992724664412e-05, "loss": 2.3726, "step": 452400 }, { "epoch": 11.59, "learning_rate": 1.1360453598387814e-05, "loss": 2.2297, "step": 452500 }, { "epoch": 11.59, "learning_rate": 1.1351914472111213e-05, "loss": 2.4903, "step": 452600 }, { "epoch": 11.6, "learning_rate": 1.1343375345834615e-05, "loss": 2.2608, "step": 452700 }, { "epoch": 11.6, "learning_rate": 1.1334836219558015e-05, "loss": 2.2555, "step": 452800 }, { "epoch": 11.6, "learning_rate": 1.1326297093281416e-05, "loss": 2.4198, "step": 452900 }, { "epoch": 11.6, "learning_rate": 1.1317757967004816e-05, "loss": 2.2123, "step": 453000 }, { "epoch": 11.61, "learning_rate": 1.1309218840728216e-05, "loss": 2.4742, "step": 453100 }, { "epoch": 11.61, "learning_rate": 1.1300679714451618e-05, "loss": 2.413, "step": 453200 }, { "epoch": 11.61, "learning_rate": 1.1292140588175017e-05, "loss": 2.5419, "step": 453300 }, { "epoch": 11.61, "learning_rate": 1.1283601461898419e-05, "loss": 2.3603, "step": 453400 }, { "epoch": 11.62, "learning_rate": 1.1275062335621819e-05, "loss": 2.5009, "step": 453500 }, { "epoch": 11.62, "learning_rate": 1.126652320934522e-05, "loss": 2.3219, "step": 453600 }, { "epoch": 11.62, "learning_rate": 1.125798408306862e-05, "loss": 2.2979, "step": 453700 }, { "epoch": 11.63, "learning_rate": 1.1249444956792022e-05, "loss": 2.3667, "step": 453800 }, { "epoch": 11.63, "learning_rate": 1.1240905830515422e-05, "loss": 2.4106, "step": 453900 }, { "epoch": 11.63, "learning_rate": 1.1232366704238823e-05, "loss": 2.4102, "step": 454000 }, { "epoch": 11.63, "learning_rate": 1.1223827577962223e-05, "loss": 2.5527, "step": 454100 }, { "epoch": 11.64, "learning_rate": 1.1215288451685625e-05, "loss": 2.2172, "step": 454200 }, { "epoch": 11.64, "learning_rate": 1.1206749325409024e-05, "loss": 2.5036, "step": 454300 }, { "epoch": 11.64, "learning_rate": 1.1198210199132424e-05, "loss": 2.4123, "step": 454400 }, { "epoch": 11.64, "learning_rate": 1.1189671072855826e-05, "loss": 2.5316, "step": 454500 }, { "epoch": 11.65, "learning_rate": 1.1181131946579226e-05, "loss": 2.126, "step": 454600 }, { "epoch": 11.65, "learning_rate": 1.1172592820302627e-05, "loss": 2.4036, "step": 454700 }, { "epoch": 11.65, "learning_rate": 1.1164053694026027e-05, "loss": 2.3227, "step": 454800 }, { "epoch": 11.65, "learning_rate": 1.1155514567749429e-05, "loss": 2.5125, "step": 454900 }, { "epoch": 11.66, "learning_rate": 1.1146975441472829e-05, "loss": 2.3822, "step": 455000 }, { "epoch": 11.66, "learning_rate": 1.113843631519623e-05, "loss": 2.2891, "step": 455100 }, { "epoch": 11.66, "learning_rate": 1.112989718891963e-05, "loss": 2.3755, "step": 455200 }, { "epoch": 11.66, "learning_rate": 1.1121358062643031e-05, "loss": 2.3895, "step": 455300 }, { "epoch": 11.67, "learning_rate": 1.1112818936366431e-05, "loss": 2.4506, "step": 455400 }, { "epoch": 11.67, "learning_rate": 1.1104279810089833e-05, "loss": 2.3326, "step": 455500 }, { "epoch": 11.67, "learning_rate": 1.1095740683813233e-05, "loss": 2.2889, "step": 455600 }, { "epoch": 11.67, "learning_rate": 1.1087201557536633e-05, "loss": 2.3132, "step": 455700 }, { "epoch": 11.68, "learning_rate": 1.1078662431260034e-05, "loss": 2.5263, "step": 455800 }, { "epoch": 11.68, "learning_rate": 1.1070123304983434e-05, "loss": 2.2508, "step": 455900 }, { "epoch": 11.68, "learning_rate": 1.1061584178706836e-05, "loss": 2.2724, "step": 456000 }, { "epoch": 11.68, "learning_rate": 1.1053045052430235e-05, "loss": 2.3963, "step": 456100 }, { "epoch": 11.69, "learning_rate": 1.1044505926153637e-05, "loss": 2.354, "step": 456200 }, { "epoch": 11.69, "learning_rate": 1.1035966799877037e-05, "loss": 2.3623, "step": 456300 }, { "epoch": 11.69, "learning_rate": 1.1027427673600438e-05, "loss": 2.3601, "step": 456400 }, { "epoch": 11.69, "learning_rate": 1.1018888547323838e-05, "loss": 2.3038, "step": 456500 }, { "epoch": 11.7, "learning_rate": 1.101034942104724e-05, "loss": 2.3352, "step": 456600 }, { "epoch": 11.7, "learning_rate": 1.100181029477064e-05, "loss": 2.4089, "step": 456700 }, { "epoch": 11.7, "learning_rate": 1.0993271168494041e-05, "loss": 2.3569, "step": 456800 }, { "epoch": 11.7, "learning_rate": 1.0984732042217441e-05, "loss": 2.3535, "step": 456900 }, { "epoch": 11.71, "learning_rate": 1.0976192915940841e-05, "loss": 2.3747, "step": 457000 }, { "epoch": 11.71, "learning_rate": 1.0967653789664242e-05, "loss": 2.4065, "step": 457100 }, { "epoch": 11.71, "learning_rate": 1.0959114663387642e-05, "loss": 2.524, "step": 457200 }, { "epoch": 11.71, "learning_rate": 1.0950575537111044e-05, "loss": 2.3989, "step": 457300 }, { "epoch": 11.72, "learning_rate": 1.0942036410834444e-05, "loss": 2.3519, "step": 457400 }, { "epoch": 11.72, "learning_rate": 1.0933497284557845e-05, "loss": 2.4087, "step": 457500 }, { "epoch": 11.72, "learning_rate": 1.0924958158281245e-05, "loss": 2.3264, "step": 457600 }, { "epoch": 11.73, "learning_rate": 1.0916419032004647e-05, "loss": 2.223, "step": 457700 }, { "epoch": 11.73, "learning_rate": 1.0907879905728046e-05, "loss": 2.3941, "step": 457800 }, { "epoch": 11.73, "learning_rate": 1.0899340779451448e-05, "loss": 2.2319, "step": 457900 }, { "epoch": 11.73, "learning_rate": 1.0890801653174848e-05, "loss": 2.2449, "step": 458000 }, { "epoch": 11.74, "learning_rate": 1.088226252689825e-05, "loss": 2.316, "step": 458100 }, { "epoch": 11.74, "learning_rate": 1.087372340062165e-05, "loss": 2.4809, "step": 458200 }, { "epoch": 11.74, "learning_rate": 1.0865184274345049e-05, "loss": 2.2141, "step": 458300 }, { "epoch": 11.74, "learning_rate": 1.085664514806845e-05, "loss": 2.2298, "step": 458400 }, { "epoch": 11.75, "learning_rate": 1.084810602179185e-05, "loss": 2.1899, "step": 458500 }, { "epoch": 11.75, "learning_rate": 1.0839566895515252e-05, "loss": 2.347, "step": 458600 }, { "epoch": 11.75, "learning_rate": 1.0831027769238652e-05, "loss": 2.489, "step": 458700 }, { "epoch": 11.75, "learning_rate": 1.0822488642962053e-05, "loss": 2.2976, "step": 458800 }, { "epoch": 11.76, "learning_rate": 1.0813949516685453e-05, "loss": 2.3948, "step": 458900 }, { "epoch": 11.76, "learning_rate": 1.0805410390408855e-05, "loss": 2.3881, "step": 459000 }, { "epoch": 11.76, "learning_rate": 1.0796871264132255e-05, "loss": 2.2191, "step": 459100 }, { "epoch": 11.76, "learning_rate": 1.0788332137855656e-05, "loss": 2.3434, "step": 459200 }, { "epoch": 11.77, "learning_rate": 1.0779793011579056e-05, "loss": 2.4501, "step": 459300 }, { "epoch": 11.77, "learning_rate": 1.0771253885302458e-05, "loss": 2.3837, "step": 459400 }, { "epoch": 11.77, "learning_rate": 1.0762714759025858e-05, "loss": 2.3998, "step": 459500 }, { "epoch": 11.77, "learning_rate": 1.0754175632749257e-05, "loss": 2.3488, "step": 459600 }, { "epoch": 11.78, "learning_rate": 1.0745636506472659e-05, "loss": 2.3771, "step": 459700 }, { "epoch": 11.78, "learning_rate": 1.0737097380196059e-05, "loss": 2.4188, "step": 459800 }, { "epoch": 11.78, "learning_rate": 1.072855825391946e-05, "loss": 2.4259, "step": 459900 }, { "epoch": 11.78, "learning_rate": 1.072001912764286e-05, "loss": 2.4023, "step": 460000 }, { "epoch": 11.79, "learning_rate": 1.0711480001366262e-05, "loss": 2.4128, "step": 460100 }, { "epoch": 11.79, "learning_rate": 1.070294087508966e-05, "loss": 2.3, "step": 460200 }, { "epoch": 11.79, "learning_rate": 1.0694401748813061e-05, "loss": 2.2961, "step": 460300 }, { "epoch": 11.79, "learning_rate": 1.0685862622536461e-05, "loss": 2.2691, "step": 460400 }, { "epoch": 11.8, "learning_rate": 1.0677323496259863e-05, "loss": 2.3439, "step": 460500 }, { "epoch": 11.8, "learning_rate": 1.0668784369983263e-05, "loss": 2.2766, "step": 460600 }, { "epoch": 11.8, "learning_rate": 1.0660245243706664e-05, "loss": 2.2719, "step": 460700 }, { "epoch": 11.8, "learning_rate": 1.0651706117430064e-05, "loss": 2.4551, "step": 460800 }, { "epoch": 11.81, "learning_rate": 1.0643166991153466e-05, "loss": 2.3891, "step": 460900 }, { "epoch": 11.81, "learning_rate": 1.0634627864876866e-05, "loss": 2.186, "step": 461000 }, { "epoch": 11.81, "learning_rate": 1.0626088738600267e-05, "loss": 2.4296, "step": 461100 }, { "epoch": 11.81, "learning_rate": 1.0617549612323667e-05, "loss": 2.646, "step": 461200 }, { "epoch": 11.82, "learning_rate": 1.0609010486047068e-05, "loss": 2.2722, "step": 461300 }, { "epoch": 11.82, "learning_rate": 1.0600471359770468e-05, "loss": 2.3699, "step": 461400 }, { "epoch": 11.82, "learning_rate": 1.0591932233493868e-05, "loss": 2.5427, "step": 461500 }, { "epoch": 11.82, "learning_rate": 1.058339310721727e-05, "loss": 2.4104, "step": 461600 }, { "epoch": 11.83, "learning_rate": 1.057485398094067e-05, "loss": 2.2132, "step": 461700 }, { "epoch": 11.83, "learning_rate": 1.0566314854664071e-05, "loss": 2.2677, "step": 461800 }, { "epoch": 11.83, "learning_rate": 1.0557775728387471e-05, "loss": 2.5672, "step": 461900 }, { "epoch": 11.84, "learning_rate": 1.0549236602110873e-05, "loss": 2.3104, "step": 462000 }, { "epoch": 11.84, "learning_rate": 1.0540697475834272e-05, "loss": 2.3313, "step": 462100 }, { "epoch": 11.84, "learning_rate": 1.0532158349557674e-05, "loss": 2.2646, "step": 462200 }, { "epoch": 11.84, "learning_rate": 1.0523619223281074e-05, "loss": 2.3769, "step": 462300 }, { "epoch": 11.85, "learning_rate": 1.0515080097004475e-05, "loss": 2.4205, "step": 462400 }, { "epoch": 11.85, "learning_rate": 1.0506540970727875e-05, "loss": 2.4462, "step": 462500 }, { "epoch": 11.85, "learning_rate": 1.0498001844451277e-05, "loss": 2.3421, "step": 462600 }, { "epoch": 11.85, "learning_rate": 1.0489462718174677e-05, "loss": 2.3728, "step": 462700 }, { "epoch": 11.86, "learning_rate": 1.0480923591898076e-05, "loss": 2.2476, "step": 462800 }, { "epoch": 11.86, "learning_rate": 1.0472384465621478e-05, "loss": 2.2497, "step": 462900 }, { "epoch": 11.86, "learning_rate": 1.0463845339344878e-05, "loss": 2.5004, "step": 463000 }, { "epoch": 11.86, "learning_rate": 1.045530621306828e-05, "loss": 2.323, "step": 463100 }, { "epoch": 11.87, "learning_rate": 1.044676708679168e-05, "loss": 2.2561, "step": 463200 }, { "epoch": 11.87, "learning_rate": 1.043822796051508e-05, "loss": 2.4128, "step": 463300 }, { "epoch": 11.87, "learning_rate": 1.042968883423848e-05, "loss": 2.1714, "step": 463400 }, { "epoch": 11.87, "learning_rate": 1.0421149707961882e-05, "loss": 2.475, "step": 463500 }, { "epoch": 11.88, "learning_rate": 1.0412610581685282e-05, "loss": 2.2916, "step": 463600 }, { "epoch": 11.88, "learning_rate": 1.0404071455408684e-05, "loss": 2.1948, "step": 463700 }, { "epoch": 11.88, "learning_rate": 1.0395532329132083e-05, "loss": 2.4248, "step": 463800 }, { "epoch": 11.88, "learning_rate": 1.0386993202855485e-05, "loss": 2.5744, "step": 463900 }, { "epoch": 11.89, "learning_rate": 1.0378454076578885e-05, "loss": 2.2751, "step": 464000 }, { "epoch": 11.89, "learning_rate": 1.0369914950302285e-05, "loss": 2.5041, "step": 464100 }, { "epoch": 11.89, "learning_rate": 1.0361375824025686e-05, "loss": 2.4111, "step": 464200 }, { "epoch": 11.89, "learning_rate": 1.0352836697749086e-05, "loss": 2.3358, "step": 464300 }, { "epoch": 11.9, "learning_rate": 1.0344297571472488e-05, "loss": 2.3797, "step": 464400 }, { "epoch": 11.9, "learning_rate": 1.0335758445195888e-05, "loss": 2.3143, "step": 464500 }, { "epoch": 11.9, "learning_rate": 1.0327219318919289e-05, "loss": 2.4891, "step": 464600 }, { "epoch": 11.9, "learning_rate": 1.0318680192642689e-05, "loss": 2.315, "step": 464700 }, { "epoch": 11.91, "learning_rate": 1.031014106636609e-05, "loss": 2.3748, "step": 464800 }, { "epoch": 11.91, "learning_rate": 1.030160194008949e-05, "loss": 2.3514, "step": 464900 }, { "epoch": 11.91, "learning_rate": 1.0293062813812892e-05, "loss": 2.3933, "step": 465000 }, { "epoch": 11.91, "learning_rate": 1.0284523687536292e-05, "loss": 2.5657, "step": 465100 }, { "epoch": 11.92, "learning_rate": 1.0275984561259693e-05, "loss": 2.3748, "step": 465200 }, { "epoch": 11.92, "learning_rate": 1.0267445434983093e-05, "loss": 2.3294, "step": 465300 }, { "epoch": 11.92, "learning_rate": 1.0258906308706493e-05, "loss": 2.3038, "step": 465400 }, { "epoch": 11.92, "learning_rate": 1.0250367182429895e-05, "loss": 2.3801, "step": 465500 }, { "epoch": 11.93, "learning_rate": 1.0241828056153294e-05, "loss": 2.2168, "step": 465600 }, { "epoch": 11.93, "learning_rate": 1.0233288929876696e-05, "loss": 2.2804, "step": 465700 }, { "epoch": 11.93, "learning_rate": 1.0224749803600096e-05, "loss": 2.3886, "step": 465800 }, { "epoch": 11.94, "learning_rate": 1.0216210677323497e-05, "loss": 2.5101, "step": 465900 }, { "epoch": 11.94, "learning_rate": 1.0207671551046897e-05, "loss": 2.477, "step": 466000 }, { "epoch": 11.94, "learning_rate": 1.0199132424770299e-05, "loss": 2.2089, "step": 466100 }, { "epoch": 11.94, "learning_rate": 1.0190593298493699e-05, "loss": 2.4535, "step": 466200 }, { "epoch": 11.95, "learning_rate": 1.01820541722171e-05, "loss": 2.2373, "step": 466300 }, { "epoch": 11.95, "learning_rate": 1.01735150459405e-05, "loss": 2.3935, "step": 466400 }, { "epoch": 11.95, "learning_rate": 1.0164975919663902e-05, "loss": 2.3327, "step": 466500 }, { "epoch": 11.95, "learning_rate": 1.0156436793387301e-05, "loss": 2.3853, "step": 466600 }, { "epoch": 11.96, "learning_rate": 1.0147897667110701e-05, "loss": 2.2725, "step": 466700 }, { "epoch": 11.96, "learning_rate": 1.0139358540834103e-05, "loss": 2.3891, "step": 466800 }, { "epoch": 11.96, "learning_rate": 1.0130819414557503e-05, "loss": 2.3847, "step": 466900 }, { "epoch": 11.96, "learning_rate": 1.0122280288280904e-05, "loss": 2.6729, "step": 467000 }, { "epoch": 11.97, "learning_rate": 1.0113741162004304e-05, "loss": 2.4303, "step": 467100 }, { "epoch": 11.97, "learning_rate": 1.0105202035727706e-05, "loss": 2.6356, "step": 467200 }, { "epoch": 11.97, "learning_rate": 1.0096662909451105e-05, "loss": 2.3694, "step": 467300 }, { "epoch": 11.97, "learning_rate": 1.0088123783174507e-05, "loss": 2.2721, "step": 467400 }, { "epoch": 11.98, "learning_rate": 1.0079584656897907e-05, "loss": 2.4132, "step": 467500 }, { "epoch": 11.98, "learning_rate": 1.0071045530621308e-05, "loss": 2.3676, "step": 467600 }, { "epoch": 11.98, "learning_rate": 1.0062506404344708e-05, "loss": 2.2973, "step": 467700 }, { "epoch": 11.98, "learning_rate": 1.005396727806811e-05, "loss": 2.2985, "step": 467800 }, { "epoch": 11.99, "learning_rate": 1.004542815179151e-05, "loss": 2.3777, "step": 467900 }, { "epoch": 11.99, "learning_rate": 1.003688902551491e-05, "loss": 2.3674, "step": 468000 }, { "epoch": 11.99, "learning_rate": 1.0028349899238311e-05, "loss": 2.3113, "step": 468100 }, { "epoch": 11.99, "learning_rate": 1.0019810772961711e-05, "loss": 2.3669, "step": 468200 }, { "epoch": 12.0, "learning_rate": 1.0011271646685112e-05, "loss": 2.363, "step": 468300 }, { "epoch": 12.0, "learning_rate": 1.0002732520408512e-05, "loss": 2.3587, "step": 468400 }, { "epoch": 12.0, "learning_rate": 9.994193394131914e-06, "loss": 2.2949, "step": 468500 }, { "epoch": 12.0, "learning_rate": 9.985654267855314e-06, "loss": 2.3748, "step": 468600 }, { "epoch": 12.01, "learning_rate": 9.977115141578715e-06, "loss": 2.3995, "step": 468700 }, { "epoch": 12.01, "learning_rate": 9.968576015302115e-06, "loss": 2.3465, "step": 468800 }, { "epoch": 12.01, "learning_rate": 9.960036889025517e-06, "loss": 2.2719, "step": 468900 }, { "epoch": 12.01, "learning_rate": 9.951497762748915e-06, "loss": 2.1681, "step": 469000 }, { "epoch": 12.02, "learning_rate": 9.942958636472316e-06, "loss": 2.2478, "step": 469100 }, { "epoch": 12.02, "learning_rate": 9.934419510195716e-06, "loss": 2.3767, "step": 469200 }, { "epoch": 12.02, "learning_rate": 9.925880383919118e-06, "loss": 2.3495, "step": 469300 }, { "epoch": 12.02, "learning_rate": 9.917341257642518e-06, "loss": 2.4604, "step": 469400 }, { "epoch": 12.03, "learning_rate": 9.90880213136592e-06, "loss": 2.281, "step": 469500 }, { "epoch": 12.03, "learning_rate": 9.900263005089319e-06, "loss": 2.2163, "step": 469600 }, { "epoch": 12.03, "learning_rate": 9.89172387881272e-06, "loss": 2.2095, "step": 469700 }, { "epoch": 12.04, "learning_rate": 9.88318475253612e-06, "loss": 2.3108, "step": 469800 }, { "epoch": 12.04, "learning_rate": 9.87464562625952e-06, "loss": 2.2822, "step": 469900 }, { "epoch": 12.04, "learning_rate": 9.866106499982922e-06, "loss": 2.4625, "step": 470000 }, { "epoch": 12.04, "learning_rate": 9.857567373706322e-06, "loss": 2.291, "step": 470100 }, { "epoch": 12.05, "learning_rate": 9.849028247429723e-06, "loss": 2.3437, "step": 470200 }, { "epoch": 12.05, "learning_rate": 9.840489121153123e-06, "loss": 2.3757, "step": 470300 }, { "epoch": 12.05, "learning_rate": 9.831949994876525e-06, "loss": 2.4156, "step": 470400 }, { "epoch": 12.05, "learning_rate": 9.823410868599925e-06, "loss": 2.3461, "step": 470500 }, { "epoch": 12.06, "learning_rate": 9.814871742323326e-06, "loss": 2.303, "step": 470600 }, { "epoch": 12.06, "learning_rate": 9.806332616046726e-06, "loss": 2.3965, "step": 470700 }, { "epoch": 12.06, "learning_rate": 9.797793489770127e-06, "loss": 2.4787, "step": 470800 }, { "epoch": 12.06, "learning_rate": 9.789254363493527e-06, "loss": 2.2388, "step": 470900 }, { "epoch": 12.07, "learning_rate": 9.780715237216929e-06, "loss": 2.1807, "step": 471000 }, { "epoch": 12.07, "learning_rate": 9.772176110940329e-06, "loss": 2.3898, "step": 471100 }, { "epoch": 12.07, "learning_rate": 9.763636984663729e-06, "loss": 2.3237, "step": 471200 }, { "epoch": 12.07, "learning_rate": 9.75509785838713e-06, "loss": 2.3549, "step": 471300 }, { "epoch": 12.08, "learning_rate": 9.74655873211053e-06, "loss": 2.4313, "step": 471400 }, { "epoch": 12.08, "learning_rate": 9.738019605833932e-06, "loss": 2.3672, "step": 471500 }, { "epoch": 12.08, "learning_rate": 9.729480479557331e-06, "loss": 2.4768, "step": 471600 }, { "epoch": 12.08, "learning_rate": 9.720941353280733e-06, "loss": 2.4814, "step": 471700 }, { "epoch": 12.09, "learning_rate": 9.712402227004133e-06, "loss": 2.2807, "step": 471800 }, { "epoch": 12.09, "learning_rate": 9.703863100727534e-06, "loss": 2.4814, "step": 471900 }, { "epoch": 12.09, "learning_rate": 9.695323974450934e-06, "loss": 2.1597, "step": 472000 }, { "epoch": 12.09, "learning_rate": 9.686784848174336e-06, "loss": 2.5317, "step": 472100 }, { "epoch": 12.1, "learning_rate": 9.678245721897736e-06, "loss": 2.3068, "step": 472200 }, { "epoch": 12.1, "learning_rate": 9.669706595621137e-06, "loss": 2.5735, "step": 472300 }, { "epoch": 12.1, "learning_rate": 9.661167469344537e-06, "loss": 2.3386, "step": 472400 }, { "epoch": 12.1, "learning_rate": 9.652628343067937e-06, "loss": 2.1372, "step": 472500 }, { "epoch": 12.11, "learning_rate": 9.644089216791338e-06, "loss": 2.3445, "step": 472600 }, { "epoch": 12.11, "learning_rate": 9.635550090514738e-06, "loss": 2.3872, "step": 472700 }, { "epoch": 12.11, "learning_rate": 9.62701096423814e-06, "loss": 2.3145, "step": 472800 }, { "epoch": 12.11, "learning_rate": 9.61847183796154e-06, "loss": 2.3415, "step": 472900 }, { "epoch": 12.12, "learning_rate": 9.609932711684941e-06, "loss": 2.4201, "step": 473000 }, { "epoch": 12.12, "learning_rate": 9.601393585408341e-06, "loss": 2.4163, "step": 473100 }, { "epoch": 12.12, "learning_rate": 9.592854459131743e-06, "loss": 2.2282, "step": 473200 }, { "epoch": 12.12, "learning_rate": 9.584315332855142e-06, "loss": 2.3337, "step": 473300 }, { "epoch": 12.13, "learning_rate": 9.575776206578544e-06, "loss": 2.5322, "step": 473400 }, { "epoch": 12.13, "learning_rate": 9.567237080301944e-06, "loss": 2.2854, "step": 473500 }, { "epoch": 12.13, "learning_rate": 9.558697954025345e-06, "loss": 2.3463, "step": 473600 }, { "epoch": 12.13, "learning_rate": 9.550158827748745e-06, "loss": 2.1978, "step": 473700 }, { "epoch": 12.14, "learning_rate": 9.541619701472145e-06, "loss": 2.4242, "step": 473800 }, { "epoch": 12.14, "learning_rate": 9.533080575195547e-06, "loss": 2.3271, "step": 473900 }, { "epoch": 12.14, "learning_rate": 9.524541448918947e-06, "loss": 2.3342, "step": 474000 }, { "epoch": 12.15, "learning_rate": 9.516002322642348e-06, "loss": 2.4026, "step": 474100 }, { "epoch": 12.15, "learning_rate": 9.507463196365748e-06, "loss": 2.404, "step": 474200 }, { "epoch": 12.15, "learning_rate": 9.49892407008915e-06, "loss": 2.3344, "step": 474300 }, { "epoch": 12.15, "learning_rate": 9.49038494381255e-06, "loss": 2.4367, "step": 474400 }, { "epoch": 12.16, "learning_rate": 9.481845817535951e-06, "loss": 2.3966, "step": 474500 }, { "epoch": 12.16, "learning_rate": 9.47330669125935e-06, "loss": 2.3476, "step": 474600 }, { "epoch": 12.16, "learning_rate": 9.464767564982752e-06, "loss": 2.2906, "step": 474700 }, { "epoch": 12.16, "learning_rate": 9.456228438706152e-06, "loss": 2.5284, "step": 474800 }, { "epoch": 12.17, "learning_rate": 9.447689312429554e-06, "loss": 2.3994, "step": 474900 }, { "epoch": 12.17, "learning_rate": 9.439150186152954e-06, "loss": 2.2996, "step": 475000 }, { "epoch": 12.17, "learning_rate": 9.430611059876353e-06, "loss": 2.3269, "step": 475100 }, { "epoch": 12.17, "learning_rate": 9.422071933599755e-06, "loss": 2.3843, "step": 475200 }, { "epoch": 12.18, "learning_rate": 9.413532807323155e-06, "loss": 2.4582, "step": 475300 }, { "epoch": 12.18, "learning_rate": 9.404993681046556e-06, "loss": 2.473, "step": 475400 }, { "epoch": 12.18, "learning_rate": 9.396454554769956e-06, "loss": 2.2896, "step": 475500 }, { "epoch": 12.18, "learning_rate": 9.387915428493358e-06, "loss": 2.3914, "step": 475600 }, { "epoch": 12.19, "learning_rate": 9.379376302216758e-06, "loss": 2.3402, "step": 475700 }, { "epoch": 12.19, "learning_rate": 9.370837175940159e-06, "loss": 2.4208, "step": 475800 }, { "epoch": 12.19, "learning_rate": 9.362298049663559e-06, "loss": 2.4028, "step": 475900 }, { "epoch": 12.19, "learning_rate": 9.35375892338696e-06, "loss": 2.3018, "step": 476000 }, { "epoch": 12.2, "learning_rate": 9.34521979711036e-06, "loss": 2.3314, "step": 476100 }, { "epoch": 12.2, "learning_rate": 9.336680670833762e-06, "loss": 2.2372, "step": 476200 }, { "epoch": 12.2, "learning_rate": 9.328141544557162e-06, "loss": 2.267, "step": 476300 }, { "epoch": 12.2, "learning_rate": 9.319602418280562e-06, "loss": 2.2984, "step": 476400 }, { "epoch": 12.21, "learning_rate": 9.311063292003963e-06, "loss": 2.2714, "step": 476500 }, { "epoch": 12.21, "learning_rate": 9.302524165727363e-06, "loss": 2.4087, "step": 476600 }, { "epoch": 12.21, "learning_rate": 9.293985039450765e-06, "loss": 2.3647, "step": 476700 }, { "epoch": 12.21, "learning_rate": 9.285445913174164e-06, "loss": 2.3222, "step": 476800 }, { "epoch": 12.22, "learning_rate": 9.276906786897566e-06, "loss": 2.3455, "step": 476900 }, { "epoch": 12.22, "learning_rate": 9.268367660620966e-06, "loss": 2.2878, "step": 477000 }, { "epoch": 12.22, "learning_rate": 9.259828534344367e-06, "loss": 2.2694, "step": 477100 }, { "epoch": 12.22, "learning_rate": 9.251289408067767e-06, "loss": 2.3443, "step": 477200 }, { "epoch": 12.23, "learning_rate": 9.242750281791169e-06, "loss": 2.2092, "step": 477300 }, { "epoch": 12.23, "learning_rate": 9.234211155514569e-06, "loss": 2.336, "step": 477400 }, { "epoch": 12.23, "learning_rate": 9.22567202923797e-06, "loss": 2.4295, "step": 477500 }, { "epoch": 12.23, "learning_rate": 9.21713290296137e-06, "loss": 2.323, "step": 477600 }, { "epoch": 12.24, "learning_rate": 9.20859377668477e-06, "loss": 2.4561, "step": 477700 }, { "epoch": 12.24, "learning_rate": 9.20005465040817e-06, "loss": 2.2738, "step": 477800 }, { "epoch": 12.24, "learning_rate": 9.191515524131571e-06, "loss": 2.2431, "step": 477900 }, { "epoch": 12.25, "learning_rate": 9.182976397854971e-06, "loss": 2.2159, "step": 478000 }, { "epoch": 12.25, "learning_rate": 9.174437271578373e-06, "loss": 2.4646, "step": 478100 }, { "epoch": 12.25, "learning_rate": 9.165898145301773e-06, "loss": 2.6006, "step": 478200 }, { "epoch": 12.25, "learning_rate": 9.157359019025172e-06, "loss": 2.4189, "step": 478300 }, { "epoch": 12.26, "learning_rate": 9.148819892748574e-06, "loss": 2.4249, "step": 478400 }, { "epoch": 12.26, "learning_rate": 9.140280766471974e-06, "loss": 2.441, "step": 478500 }, { "epoch": 12.26, "learning_rate": 9.131741640195375e-06, "loss": 2.3329, "step": 478600 }, { "epoch": 12.26, "learning_rate": 9.123202513918775e-06, "loss": 2.3692, "step": 478700 }, { "epoch": 12.27, "learning_rate": 9.114663387642177e-06, "loss": 2.2183, "step": 478800 }, { "epoch": 12.27, "learning_rate": 9.106124261365577e-06, "loss": 2.2556, "step": 478900 }, { "epoch": 12.27, "learning_rate": 9.097585135088978e-06, "loss": 2.2597, "step": 479000 }, { "epoch": 12.27, "learning_rate": 9.089046008812378e-06, "loss": 2.2906, "step": 479100 }, { "epoch": 12.28, "learning_rate": 9.08050688253578e-06, "loss": 2.4126, "step": 479200 }, { "epoch": 12.28, "learning_rate": 9.07196775625918e-06, "loss": 2.4707, "step": 479300 }, { "epoch": 12.28, "learning_rate": 9.063428629982581e-06, "loss": 2.2973, "step": 479400 }, { "epoch": 12.28, "learning_rate": 9.05488950370598e-06, "loss": 2.3448, "step": 479500 }, { "epoch": 12.29, "learning_rate": 9.04635037742938e-06, "loss": 2.2431, "step": 479600 }, { "epoch": 12.29, "learning_rate": 9.037811251152782e-06, "loss": 2.1868, "step": 479700 }, { "epoch": 12.29, "learning_rate": 9.029272124876182e-06, "loss": 2.4682, "step": 479800 }, { "epoch": 12.29, "learning_rate": 9.020732998599584e-06, "loss": 2.2371, "step": 479900 }, { "epoch": 12.3, "learning_rate": 9.012193872322983e-06, "loss": 2.3678, "step": 480000 }, { "epoch": 12.3, "learning_rate": 9.003654746046385e-06, "loss": 2.2952, "step": 480100 }, { "epoch": 12.3, "learning_rate": 8.995115619769785e-06, "loss": 2.3105, "step": 480200 }, { "epoch": 12.3, "learning_rate": 8.986576493493186e-06, "loss": 2.2523, "step": 480300 }, { "epoch": 12.31, "learning_rate": 8.978037367216586e-06, "loss": 2.2952, "step": 480400 }, { "epoch": 12.31, "learning_rate": 8.969498240939988e-06, "loss": 2.1216, "step": 480500 }, { "epoch": 12.31, "learning_rate": 8.960959114663388e-06, "loss": 2.2869, "step": 480600 }, { "epoch": 12.31, "learning_rate": 8.95241998838679e-06, "loss": 2.2056, "step": 480700 }, { "epoch": 12.32, "learning_rate": 8.943880862110189e-06, "loss": 2.4429, "step": 480800 }, { "epoch": 12.32, "learning_rate": 8.935341735833589e-06, "loss": 2.4611, "step": 480900 }, { "epoch": 12.32, "learning_rate": 8.92680260955699e-06, "loss": 2.3674, "step": 481000 }, { "epoch": 12.32, "learning_rate": 8.91826348328039e-06, "loss": 2.2883, "step": 481100 }, { "epoch": 12.33, "learning_rate": 8.909724357003792e-06, "loss": 2.3646, "step": 481200 }, { "epoch": 12.33, "learning_rate": 8.901185230727192e-06, "loss": 2.1688, "step": 481300 }, { "epoch": 12.33, "learning_rate": 8.892646104450593e-06, "loss": 2.4568, "step": 481400 }, { "epoch": 12.33, "learning_rate": 8.884106978173993e-06, "loss": 2.3121, "step": 481500 }, { "epoch": 12.34, "learning_rate": 8.875567851897395e-06, "loss": 2.2561, "step": 481600 }, { "epoch": 12.34, "learning_rate": 8.867028725620795e-06, "loss": 2.441, "step": 481700 }, { "epoch": 12.34, "learning_rate": 8.858489599344196e-06, "loss": 2.279, "step": 481800 }, { "epoch": 12.35, "learning_rate": 8.849950473067596e-06, "loss": 2.2011, "step": 481900 }, { "epoch": 12.35, "learning_rate": 8.841411346790998e-06, "loss": 2.4275, "step": 482000 }, { "epoch": 12.35, "learning_rate": 8.832872220514397e-06, "loss": 2.4149, "step": 482100 }, { "epoch": 12.35, "learning_rate": 8.824333094237797e-06, "loss": 2.3443, "step": 482200 }, { "epoch": 12.36, "learning_rate": 8.815793967961199e-06, "loss": 2.4427, "step": 482300 }, { "epoch": 12.36, "learning_rate": 8.807254841684599e-06, "loss": 2.4005, "step": 482400 }, { "epoch": 12.36, "learning_rate": 8.798715715408e-06, "loss": 2.2363, "step": 482500 }, { "epoch": 12.36, "learning_rate": 8.7901765891314e-06, "loss": 2.3844, "step": 482600 }, { "epoch": 12.37, "learning_rate": 8.781637462854802e-06, "loss": 2.2652, "step": 482700 }, { "epoch": 12.37, "learning_rate": 8.773098336578201e-06, "loss": 2.4342, "step": 482800 }, { "epoch": 12.37, "learning_rate": 8.764559210301603e-06, "loss": 2.313, "step": 482900 }, { "epoch": 12.37, "learning_rate": 8.756020084025003e-06, "loss": 2.3698, "step": 483000 }, { "epoch": 12.38, "learning_rate": 8.747480957748404e-06, "loss": 2.2924, "step": 483100 }, { "epoch": 12.38, "learning_rate": 8.738941831471804e-06, "loss": 2.3884, "step": 483200 }, { "epoch": 12.38, "learning_rate": 8.730402705195206e-06, "loss": 2.3257, "step": 483300 }, { "epoch": 12.38, "learning_rate": 8.721863578918606e-06, "loss": 2.315, "step": 483400 }, { "epoch": 12.39, "learning_rate": 8.713324452642005e-06, "loss": 2.4, "step": 483500 }, { "epoch": 12.39, "learning_rate": 8.704785326365407e-06, "loss": 2.4332, "step": 483600 }, { "epoch": 12.39, "learning_rate": 8.696246200088807e-06, "loss": 2.4837, "step": 483700 }, { "epoch": 12.39, "learning_rate": 8.687707073812208e-06, "loss": 2.2764, "step": 483800 }, { "epoch": 12.4, "learning_rate": 8.679167947535608e-06, "loss": 2.3559, "step": 483900 }, { "epoch": 12.4, "learning_rate": 8.67062882125901e-06, "loss": 2.4048, "step": 484000 }, { "epoch": 12.4, "learning_rate": 8.66208969498241e-06, "loss": 2.2568, "step": 484100 }, { "epoch": 12.4, "learning_rate": 8.653550568705811e-06, "loss": 2.1394, "step": 484200 }, { "epoch": 12.41, "learning_rate": 8.645011442429211e-06, "loss": 2.3126, "step": 484300 }, { "epoch": 12.41, "learning_rate": 8.636472316152613e-06, "loss": 2.365, "step": 484400 }, { "epoch": 12.41, "learning_rate": 8.627933189876013e-06, "loss": 2.3888, "step": 484500 }, { "epoch": 12.41, "learning_rate": 8.619394063599414e-06, "loss": 2.309, "step": 484600 }, { "epoch": 12.42, "learning_rate": 8.610854937322814e-06, "loss": 2.3908, "step": 484700 }, { "epoch": 12.42, "learning_rate": 8.602315811046214e-06, "loss": 2.2101, "step": 484800 }, { "epoch": 12.42, "learning_rate": 8.593776684769615e-06, "loss": 2.2009, "step": 484900 }, { "epoch": 12.42, "learning_rate": 8.585237558493015e-06, "loss": 2.4389, "step": 485000 }, { "epoch": 12.43, "learning_rate": 8.576698432216417e-06, "loss": 2.4014, "step": 485100 }, { "epoch": 12.43, "learning_rate": 8.568159305939817e-06, "loss": 2.2933, "step": 485200 }, { "epoch": 12.43, "learning_rate": 8.559620179663218e-06, "loss": 2.2193, "step": 485300 }, { "epoch": 12.43, "learning_rate": 8.551081053386618e-06, "loss": 2.2519, "step": 485400 }, { "epoch": 12.44, "learning_rate": 8.54254192711002e-06, "loss": 2.4025, "step": 485500 }, { "epoch": 12.44, "learning_rate": 8.53400280083342e-06, "loss": 2.4623, "step": 485600 }, { "epoch": 12.44, "learning_rate": 8.525463674556821e-06, "loss": 2.3038, "step": 485700 }, { "epoch": 12.44, "learning_rate": 8.51692454828022e-06, "loss": 2.3208, "step": 485800 }, { "epoch": 12.45, "learning_rate": 8.508385422003622e-06, "loss": 2.3442, "step": 485900 }, { "epoch": 12.45, "learning_rate": 8.499846295727022e-06, "loss": 2.2255, "step": 486000 }, { "epoch": 12.45, "learning_rate": 8.491307169450422e-06, "loss": 2.4155, "step": 486100 }, { "epoch": 12.46, "learning_rate": 8.482768043173824e-06, "loss": 2.3096, "step": 486200 }, { "epoch": 12.46, "learning_rate": 8.474228916897223e-06, "loss": 2.3084, "step": 486300 }, { "epoch": 12.46, "learning_rate": 8.465689790620625e-06, "loss": 2.3511, "step": 486400 }, { "epoch": 12.46, "learning_rate": 8.457150664344025e-06, "loss": 2.2618, "step": 486500 }, { "epoch": 12.47, "learning_rate": 8.448611538067425e-06, "loss": 2.4265, "step": 486600 }, { "epoch": 12.47, "learning_rate": 8.440072411790825e-06, "loss": 2.4689, "step": 486700 }, { "epoch": 12.47, "learning_rate": 8.431533285514226e-06, "loss": 2.4635, "step": 486800 }, { "epoch": 12.47, "learning_rate": 8.422994159237626e-06, "loss": 2.3413, "step": 486900 }, { "epoch": 12.48, "learning_rate": 8.414455032961027e-06, "loss": 2.4025, "step": 487000 }, { "epoch": 12.48, "learning_rate": 8.405915906684427e-06, "loss": 2.3439, "step": 487100 }, { "epoch": 12.48, "learning_rate": 8.397376780407829e-06, "loss": 2.1309, "step": 487200 }, { "epoch": 12.48, "learning_rate": 8.388837654131229e-06, "loss": 2.2746, "step": 487300 }, { "epoch": 12.49, "learning_rate": 8.38029852785463e-06, "loss": 2.2547, "step": 487400 }, { "epoch": 12.49, "learning_rate": 8.37175940157803e-06, "loss": 2.2953, "step": 487500 }, { "epoch": 12.49, "learning_rate": 8.363220275301432e-06, "loss": 2.2432, "step": 487600 }, { "epoch": 12.49, "learning_rate": 8.354681149024832e-06, "loss": 2.3293, "step": 487700 }, { "epoch": 12.5, "learning_rate": 8.346142022748233e-06, "loss": 2.4646, "step": 487800 }, { "epoch": 12.5, "learning_rate": 8.337602896471633e-06, "loss": 2.3454, "step": 487900 }, { "epoch": 12.5, "learning_rate": 8.329063770195033e-06, "loss": 2.3126, "step": 488000 }, { "epoch": 12.5, "learning_rate": 8.320524643918434e-06, "loss": 2.3632, "step": 488100 }, { "epoch": 12.51, "learning_rate": 8.311985517641834e-06, "loss": 2.27, "step": 488200 }, { "epoch": 12.51, "learning_rate": 8.303446391365236e-06, "loss": 2.4339, "step": 488300 }, { "epoch": 12.51, "learning_rate": 8.294907265088636e-06, "loss": 2.3237, "step": 488400 }, { "epoch": 12.51, "learning_rate": 8.286368138812037e-06, "loss": 2.4811, "step": 488500 }, { "epoch": 12.52, "learning_rate": 8.277829012535437e-06, "loss": 2.3788, "step": 488600 }, { "epoch": 12.52, "learning_rate": 8.269289886258839e-06, "loss": 2.3031, "step": 488700 }, { "epoch": 12.52, "learning_rate": 8.260750759982238e-06, "loss": 2.4946, "step": 488800 }, { "epoch": 12.52, "learning_rate": 8.25221163370564e-06, "loss": 2.386, "step": 488900 }, { "epoch": 12.53, "learning_rate": 8.24367250742904e-06, "loss": 2.3604, "step": 489000 }, { "epoch": 12.53, "learning_rate": 8.235133381152441e-06, "loss": 2.3326, "step": 489100 }, { "epoch": 12.53, "learning_rate": 8.226594254875841e-06, "loss": 2.3458, "step": 489200 }, { "epoch": 12.53, "learning_rate": 8.218055128599241e-06, "loss": 2.4537, "step": 489300 }, { "epoch": 12.54, "learning_rate": 8.209516002322643e-06, "loss": 2.445, "step": 489400 }, { "epoch": 12.54, "learning_rate": 8.200976876046042e-06, "loss": 2.3172, "step": 489500 }, { "epoch": 12.54, "learning_rate": 8.192437749769444e-06, "loss": 2.5434, "step": 489600 }, { "epoch": 12.54, "learning_rate": 8.183898623492844e-06, "loss": 2.2746, "step": 489700 }, { "epoch": 12.55, "learning_rate": 8.175359497216245e-06, "loss": 2.5382, "step": 489800 }, { "epoch": 12.55, "learning_rate": 8.166820370939645e-06, "loss": 2.4194, "step": 489900 }, { "epoch": 12.55, "learning_rate": 8.158281244663047e-06, "loss": 2.4621, "step": 490000 }, { "epoch": 12.56, "learning_rate": 8.149742118386447e-06, "loss": 2.1997, "step": 490100 }, { "epoch": 12.56, "learning_rate": 8.141202992109848e-06, "loss": 2.3675, "step": 490200 }, { "epoch": 12.56, "learning_rate": 8.132663865833248e-06, "loss": 2.3056, "step": 490300 }, { "epoch": 12.56, "learning_rate": 8.12412473955665e-06, "loss": 2.359, "step": 490400 }, { "epoch": 12.57, "learning_rate": 8.11558561328005e-06, "loss": 2.2198, "step": 490500 }, { "epoch": 12.57, "learning_rate": 8.10704648700345e-06, "loss": 2.2349, "step": 490600 }, { "epoch": 12.57, "learning_rate": 8.098507360726851e-06, "loss": 2.4303, "step": 490700 }, { "epoch": 12.57, "learning_rate": 8.08996823445025e-06, "loss": 2.4193, "step": 490800 }, { "epoch": 12.58, "learning_rate": 8.081429108173652e-06, "loss": 2.2192, "step": 490900 }, { "epoch": 12.58, "learning_rate": 8.072889981897052e-06, "loss": 2.3109, "step": 491000 }, { "epoch": 12.58, "learning_rate": 8.064350855620454e-06, "loss": 2.3217, "step": 491100 }, { "epoch": 12.58, "learning_rate": 8.055811729343854e-06, "loss": 2.2739, "step": 491200 }, { "epoch": 12.59, "learning_rate": 8.047272603067255e-06, "loss": 2.4991, "step": 491300 }, { "epoch": 12.59, "learning_rate": 8.038733476790655e-06, "loss": 2.234, "step": 491400 }, { "epoch": 12.59, "learning_rate": 8.030194350514057e-06, "loss": 2.27, "step": 491500 }, { "epoch": 12.59, "learning_rate": 8.021655224237456e-06, "loss": 2.4502, "step": 491600 }, { "epoch": 12.6, "learning_rate": 8.013116097960858e-06, "loss": 2.3828, "step": 491700 }, { "epoch": 12.6, "learning_rate": 8.004576971684258e-06, "loss": 2.3753, "step": 491800 }, { "epoch": 12.6, "learning_rate": 7.996037845407658e-06, "loss": 2.5328, "step": 491900 }, { "epoch": 12.6, "learning_rate": 7.98749871913106e-06, "loss": 2.0865, "step": 492000 }, { "epoch": 12.61, "learning_rate": 7.978959592854459e-06, "loss": 2.3216, "step": 492100 }, { "epoch": 12.61, "learning_rate": 7.97042046657786e-06, "loss": 2.1444, "step": 492200 }, { "epoch": 12.61, "learning_rate": 7.96188134030126e-06, "loss": 2.1694, "step": 492300 }, { "epoch": 12.61, "learning_rate": 7.953342214024662e-06, "loss": 2.258, "step": 492400 }, { "epoch": 12.62, "learning_rate": 7.944803087748062e-06, "loss": 2.4035, "step": 492500 }, { "epoch": 12.62, "learning_rate": 7.936263961471463e-06, "loss": 2.3153, "step": 492600 }, { "epoch": 12.62, "learning_rate": 7.927724835194863e-06, "loss": 2.4465, "step": 492700 }, { "epoch": 12.62, "learning_rate": 7.919185708918265e-06, "loss": 2.341, "step": 492800 }, { "epoch": 12.63, "learning_rate": 7.910646582641665e-06, "loss": 2.3799, "step": 492900 }, { "epoch": 12.63, "learning_rate": 7.902107456365066e-06, "loss": 2.2845, "step": 493000 }, { "epoch": 12.63, "learning_rate": 7.893568330088466e-06, "loss": 2.0654, "step": 493100 }, { "epoch": 12.63, "learning_rate": 7.885029203811866e-06, "loss": 2.4565, "step": 493200 }, { "epoch": 12.64, "learning_rate": 7.876490077535267e-06, "loss": 2.3472, "step": 493300 }, { "epoch": 12.64, "learning_rate": 7.867950951258667e-06, "loss": 2.3162, "step": 493400 }, { "epoch": 12.64, "learning_rate": 7.859411824982069e-06, "loss": 2.3647, "step": 493500 }, { "epoch": 12.64, "learning_rate": 7.850872698705469e-06, "loss": 2.4958, "step": 493600 }, { "epoch": 12.65, "learning_rate": 7.84233357242887e-06, "loss": 2.2391, "step": 493700 }, { "epoch": 12.65, "learning_rate": 7.83379444615227e-06, "loss": 2.2725, "step": 493800 }, { "epoch": 12.65, "learning_rate": 7.825255319875672e-06, "loss": 2.3836, "step": 493900 }, { "epoch": 12.65, "learning_rate": 7.816716193599071e-06, "loss": 2.3073, "step": 494000 }, { "epoch": 12.66, "learning_rate": 7.808177067322473e-06, "loss": 2.152, "step": 494100 }, { "epoch": 12.66, "learning_rate": 7.799637941045873e-06, "loss": 2.2982, "step": 494200 }, { "epoch": 12.66, "learning_rate": 7.791098814769274e-06, "loss": 2.4182, "step": 494300 }, { "epoch": 12.67, "learning_rate": 7.782559688492674e-06, "loss": 2.1968, "step": 494400 }, { "epoch": 12.67, "learning_rate": 7.774020562216074e-06, "loss": 2.4608, "step": 494500 }, { "epoch": 12.67, "learning_rate": 7.765481435939476e-06, "loss": 2.3586, "step": 494600 }, { "epoch": 12.67, "learning_rate": 7.756942309662876e-06, "loss": 2.397, "step": 494700 }, { "epoch": 12.68, "learning_rate": 7.748403183386277e-06, "loss": 2.3283, "step": 494800 }, { "epoch": 12.68, "learning_rate": 7.739864057109677e-06, "loss": 2.4132, "step": 494900 }, { "epoch": 12.68, "learning_rate": 7.731324930833079e-06, "loss": 2.4419, "step": 495000 }, { "epoch": 12.68, "learning_rate": 7.722785804556478e-06, "loss": 2.1422, "step": 495100 }, { "epoch": 12.69, "learning_rate": 7.71424667827988e-06, "loss": 2.289, "step": 495200 }, { "epoch": 12.69, "learning_rate": 7.705707552003278e-06, "loss": 2.4988, "step": 495300 }, { "epoch": 12.69, "learning_rate": 7.69716842572668e-06, "loss": 2.2937, "step": 495400 }, { "epoch": 12.69, "learning_rate": 7.68862929945008e-06, "loss": 2.4985, "step": 495500 }, { "epoch": 12.7, "learning_rate": 7.680090173173481e-06, "loss": 2.3567, "step": 495600 }, { "epoch": 12.7, "learning_rate": 7.671551046896881e-06, "loss": 2.1894, "step": 495700 }, { "epoch": 12.7, "learning_rate": 7.663011920620282e-06, "loss": 2.232, "step": 495800 }, { "epoch": 12.7, "learning_rate": 7.654472794343682e-06, "loss": 2.4518, "step": 495900 }, { "epoch": 12.71, "learning_rate": 7.645933668067084e-06, "loss": 2.3342, "step": 496000 }, { "epoch": 12.71, "learning_rate": 7.637394541790484e-06, "loss": 2.3339, "step": 496100 }, { "epoch": 12.71, "learning_rate": 7.628855415513884e-06, "loss": 2.4171, "step": 496200 }, { "epoch": 12.71, "learning_rate": 7.620316289237285e-06, "loss": 2.3569, "step": 496300 }, { "epoch": 12.72, "learning_rate": 7.611777162960686e-06, "loss": 2.2601, "step": 496400 }, { "epoch": 12.72, "learning_rate": 7.6032380366840865e-06, "loss": 2.3, "step": 496500 }, { "epoch": 12.72, "learning_rate": 7.594698910407487e-06, "loss": 2.1885, "step": 496600 }, { "epoch": 12.72, "learning_rate": 7.586159784130888e-06, "loss": 2.4208, "step": 496700 }, { "epoch": 12.73, "learning_rate": 7.577620657854289e-06, "loss": 2.2028, "step": 496800 }, { "epoch": 12.73, "learning_rate": 7.569081531577689e-06, "loss": 2.1802, "step": 496900 }, { "epoch": 12.73, "learning_rate": 7.560542405301089e-06, "loss": 2.4717, "step": 497000 }, { "epoch": 12.73, "learning_rate": 7.55200327902449e-06, "loss": 2.5148, "step": 497100 }, { "epoch": 12.74, "learning_rate": 7.5434641527478905e-06, "loss": 2.3434, "step": 497200 }, { "epoch": 12.74, "learning_rate": 7.534925026471291e-06, "loss": 2.2749, "step": 497300 }, { "epoch": 12.74, "learning_rate": 7.526385900194692e-06, "loss": 2.4476, "step": 497400 }, { "epoch": 12.74, "learning_rate": 7.517846773918093e-06, "loss": 2.4586, "step": 497500 }, { "epoch": 12.75, "learning_rate": 7.509307647641493e-06, "loss": 2.3242, "step": 497600 }, { "epoch": 12.75, "learning_rate": 7.500768521364894e-06, "loss": 2.2898, "step": 497700 }, { "epoch": 12.75, "learning_rate": 7.492229395088295e-06, "loss": 2.1807, "step": 497800 }, { "epoch": 12.75, "learning_rate": 7.4836902688116955e-06, "loss": 2.3854, "step": 497900 }, { "epoch": 12.76, "learning_rate": 7.475151142535096e-06, "loss": 2.3335, "step": 498000 }, { "epoch": 12.76, "learning_rate": 7.466612016258497e-06, "loss": 2.4008, "step": 498100 }, { "epoch": 12.76, "learning_rate": 7.4580728899818976e-06, "loss": 2.3006, "step": 498200 }, { "epoch": 12.77, "learning_rate": 7.449533763705297e-06, "loss": 2.4518, "step": 498300 }, { "epoch": 12.77, "learning_rate": 7.440994637428698e-06, "loss": 2.2865, "step": 498400 }, { "epoch": 12.77, "learning_rate": 7.432455511152099e-06, "loss": 2.4045, "step": 498500 }, { "epoch": 12.77, "learning_rate": 7.4239163848754995e-06, "loss": 2.2694, "step": 498600 }, { "epoch": 12.78, "learning_rate": 7.4153772585989e-06, "loss": 2.3668, "step": 498700 }, { "epoch": 12.78, "learning_rate": 7.406838132322301e-06, "loss": 2.3019, "step": 498800 }, { "epoch": 12.78, "learning_rate": 7.398299006045702e-06, "loss": 2.3908, "step": 498900 }, { "epoch": 12.78, "learning_rate": 7.389759879769102e-06, "loss": 2.3854, "step": 499000 }, { "epoch": 12.79, "learning_rate": 7.381220753492503e-06, "loss": 2.4618, "step": 499100 }, { "epoch": 12.79, "learning_rate": 7.372681627215904e-06, "loss": 2.4981, "step": 499200 }, { "epoch": 12.79, "learning_rate": 7.364142500939304e-06, "loss": 2.2234, "step": 499300 }, { "epoch": 12.79, "learning_rate": 7.355603374662705e-06, "loss": 2.2864, "step": 499400 }, { "epoch": 12.8, "learning_rate": 7.347064248386106e-06, "loss": 2.3078, "step": 499500 }, { "epoch": 12.8, "learning_rate": 7.338525122109506e-06, "loss": 2.311, "step": 499600 }, { "epoch": 12.8, "learning_rate": 7.329985995832906e-06, "loss": 2.3657, "step": 499700 }, { "epoch": 12.8, "learning_rate": 7.321446869556307e-06, "loss": 2.4746, "step": 499800 }, { "epoch": 12.81, "learning_rate": 7.312907743279708e-06, "loss": 2.2827, "step": 499900 }, { "epoch": 12.81, "learning_rate": 7.3043686170031085e-06, "loss": 2.4597, "step": 500000 }, { "epoch": 12.81, "learning_rate": 7.295829490726509e-06, "loss": 2.1647, "step": 500100 }, { "epoch": 12.81, "learning_rate": 7.28729036444991e-06, "loss": 2.3248, "step": 500200 }, { "epoch": 12.82, "learning_rate": 7.278751238173311e-06, "loss": 2.2732, "step": 500300 }, { "epoch": 12.82, "learning_rate": 7.270212111896711e-06, "loss": 2.4343, "step": 500400 }, { "epoch": 12.82, "learning_rate": 7.261672985620112e-06, "loss": 2.388, "step": 500500 }, { "epoch": 12.82, "learning_rate": 7.253133859343513e-06, "loss": 2.2686, "step": 500600 }, { "epoch": 12.83, "learning_rate": 7.244594733066913e-06, "loss": 2.2298, "step": 500700 }, { "epoch": 12.83, "learning_rate": 7.236055606790314e-06, "loss": 2.3278, "step": 500800 }, { "epoch": 12.83, "learning_rate": 7.227516480513715e-06, "loss": 2.3579, "step": 500900 }, { "epoch": 12.83, "learning_rate": 7.218977354237115e-06, "loss": 2.3337, "step": 501000 }, { "epoch": 12.84, "learning_rate": 7.210438227960515e-06, "loss": 2.3084, "step": 501100 }, { "epoch": 12.84, "learning_rate": 7.201899101683916e-06, "loss": 2.3216, "step": 501200 }, { "epoch": 12.84, "learning_rate": 7.193359975407317e-06, "loss": 2.381, "step": 501300 }, { "epoch": 12.84, "learning_rate": 7.1848208491307175e-06, "loss": 2.2307, "step": 501400 }, { "epoch": 12.85, "learning_rate": 7.176281722854118e-06, "loss": 2.3823, "step": 501500 }, { "epoch": 12.85, "learning_rate": 7.167742596577519e-06, "loss": 2.4136, "step": 501600 }, { "epoch": 12.85, "learning_rate": 7.1592034703009196e-06, "loss": 2.4216, "step": 501700 }, { "epoch": 12.85, "learning_rate": 7.15066434402432e-06, "loss": 2.3522, "step": 501800 }, { "epoch": 12.86, "learning_rate": 7.142125217747721e-06, "loss": 2.1623, "step": 501900 }, { "epoch": 12.86, "learning_rate": 7.133586091471122e-06, "loss": 2.313, "step": 502000 }, { "epoch": 12.86, "learning_rate": 7.125046965194522e-06, "loss": 2.2511, "step": 502100 }, { "epoch": 12.87, "learning_rate": 7.116507838917923e-06, "loss": 2.284, "step": 502200 }, { "epoch": 12.87, "learning_rate": 7.107968712641323e-06, "loss": 2.3836, "step": 502300 }, { "epoch": 12.87, "learning_rate": 7.099429586364724e-06, "loss": 2.1526, "step": 502400 }, { "epoch": 12.87, "learning_rate": 7.090890460088124e-06, "loss": 2.3357, "step": 502500 }, { "epoch": 12.88, "learning_rate": 7.082351333811525e-06, "loss": 2.2915, "step": 502600 }, { "epoch": 12.88, "learning_rate": 7.073812207534926e-06, "loss": 2.4807, "step": 502700 }, { "epoch": 12.88, "learning_rate": 7.065273081258326e-06, "loss": 2.4016, "step": 502800 }, { "epoch": 12.88, "learning_rate": 7.056733954981727e-06, "loss": 2.3555, "step": 502900 }, { "epoch": 12.89, "learning_rate": 7.048194828705128e-06, "loss": 2.3149, "step": 503000 }, { "epoch": 12.89, "learning_rate": 7.0396557024285285e-06, "loss": 2.3492, "step": 503100 }, { "epoch": 12.89, "learning_rate": 7.031116576151929e-06, "loss": 2.3255, "step": 503200 }, { "epoch": 12.89, "learning_rate": 7.02257744987533e-06, "loss": 2.3781, "step": 503300 }, { "epoch": 12.9, "learning_rate": 7.014038323598731e-06, "loss": 2.3089, "step": 503400 }, { "epoch": 12.9, "learning_rate": 7.005499197322131e-06, "loss": 2.3618, "step": 503500 }, { "epoch": 12.9, "learning_rate": 6.996960071045531e-06, "loss": 2.2661, "step": 503600 }, { "epoch": 12.9, "learning_rate": 6.988420944768932e-06, "loss": 2.4096, "step": 503700 }, { "epoch": 12.91, "learning_rate": 6.979881818492333e-06, "loss": 2.2042, "step": 503800 }, { "epoch": 12.91, "learning_rate": 6.971342692215733e-06, "loss": 2.3418, "step": 503900 }, { "epoch": 12.91, "learning_rate": 6.962803565939134e-06, "loss": 2.2529, "step": 504000 }, { "epoch": 12.91, "learning_rate": 6.954264439662533e-06, "loss": 2.3809, "step": 504100 }, { "epoch": 12.92, "learning_rate": 6.945725313385934e-06, "loss": 2.3171, "step": 504200 }, { "epoch": 12.92, "learning_rate": 6.937186187109334e-06, "loss": 2.3075, "step": 504300 }, { "epoch": 12.92, "learning_rate": 6.928647060832735e-06, "loss": 2.5179, "step": 504400 }, { "epoch": 12.92, "learning_rate": 6.920107934556136e-06, "loss": 2.3905, "step": 504500 }, { "epoch": 12.93, "learning_rate": 6.9115688082795365e-06, "loss": 2.2362, "step": 504600 }, { "epoch": 12.93, "learning_rate": 6.903029682002937e-06, "loss": 2.4593, "step": 504700 }, { "epoch": 12.93, "learning_rate": 6.894490555726338e-06, "loss": 2.3502, "step": 504800 }, { "epoch": 12.93, "learning_rate": 6.885951429449739e-06, "loss": 2.4884, "step": 504900 }, { "epoch": 12.94, "learning_rate": 6.877412303173139e-06, "loss": 2.3433, "step": 505000 }, { "epoch": 12.94, "learning_rate": 6.86887317689654e-06, "loss": 2.3418, "step": 505100 }, { "epoch": 12.94, "learning_rate": 6.860334050619941e-06, "loss": 2.2611, "step": 505200 }, { "epoch": 12.94, "learning_rate": 6.851794924343341e-06, "loss": 2.4576, "step": 505300 }, { "epoch": 12.95, "learning_rate": 6.843255798066741e-06, "loss": 2.354, "step": 505400 }, { "epoch": 12.95, "learning_rate": 6.834716671790142e-06, "loss": 2.2216, "step": 505500 }, { "epoch": 12.95, "learning_rate": 6.826177545513543e-06, "loss": 2.3089, "step": 505600 }, { "epoch": 12.95, "learning_rate": 6.817638419236943e-06, "loss": 2.405, "step": 505700 }, { "epoch": 12.96, "learning_rate": 6.809099292960344e-06, "loss": 2.2704, "step": 505800 }, { "epoch": 12.96, "learning_rate": 6.800560166683745e-06, "loss": 2.3242, "step": 505900 }, { "epoch": 12.96, "learning_rate": 6.7920210404071455e-06, "loss": 2.2489, "step": 506000 }, { "epoch": 12.96, "learning_rate": 6.783481914130546e-06, "loss": 2.2966, "step": 506100 }, { "epoch": 12.97, "learning_rate": 6.774942787853947e-06, "loss": 2.3083, "step": 506200 }, { "epoch": 12.97, "learning_rate": 6.7664036615773476e-06, "loss": 2.4113, "step": 506300 }, { "epoch": 12.97, "learning_rate": 6.757864535300748e-06, "loss": 2.3892, "step": 506400 }, { "epoch": 12.98, "learning_rate": 6.749325409024149e-06, "loss": 2.2771, "step": 506500 }, { "epoch": 12.98, "learning_rate": 6.74078628274755e-06, "loss": 2.3133, "step": 506600 }, { "epoch": 12.98, "learning_rate": 6.7322471564709495e-06, "loss": 2.3161, "step": 506700 }, { "epoch": 12.98, "learning_rate": 6.72370803019435e-06, "loss": 2.2451, "step": 506800 }, { "epoch": 12.99, "learning_rate": 6.715168903917751e-06, "loss": 2.3036, "step": 506900 }, { "epoch": 12.99, "learning_rate": 6.706629777641152e-06, "loss": 2.3547, "step": 507000 }, { "epoch": 12.99, "learning_rate": 6.698090651364552e-06, "loss": 2.3271, "step": 507100 }, { "epoch": 12.99, "learning_rate": 6.689551525087953e-06, "loss": 2.2541, "step": 507200 }, { "epoch": 13.0, "learning_rate": 6.681012398811354e-06, "loss": 2.2262, "step": 507300 }, { "epoch": 13.0, "learning_rate": 6.6724732725347544e-06, "loss": 2.2099, "step": 507400 }, { "epoch": 13.0, "learning_rate": 6.663934146258155e-06, "loss": 2.285, "step": 507500 }, { "epoch": 13.0, "learning_rate": 6.655395019981556e-06, "loss": 2.2368, "step": 507600 }, { "epoch": 13.01, "learning_rate": 6.6468558937049565e-06, "loss": 2.2398, "step": 507700 }, { "epoch": 13.01, "learning_rate": 6.638316767428357e-06, "loss": 2.3463, "step": 507800 }, { "epoch": 13.01, "learning_rate": 6.629777641151758e-06, "loss": 2.2428, "step": 507900 }, { "epoch": 13.01, "learning_rate": 6.621238514875159e-06, "loss": 2.5164, "step": 508000 }, { "epoch": 13.02, "learning_rate": 6.6126993885985585e-06, "loss": 2.2446, "step": 508100 }, { "epoch": 13.02, "learning_rate": 6.604160262321959e-06, "loss": 2.1762, "step": 508200 }, { "epoch": 13.02, "learning_rate": 6.59562113604536e-06, "loss": 2.3407, "step": 508300 }, { "epoch": 13.02, "learning_rate": 6.587082009768761e-06, "loss": 2.3899, "step": 508400 }, { "epoch": 13.03, "learning_rate": 6.578542883492161e-06, "loss": 2.2486, "step": 508500 }, { "epoch": 13.03, "learning_rate": 6.570003757215562e-06, "loss": 2.2667, "step": 508600 }, { "epoch": 13.03, "learning_rate": 6.561464630938963e-06, "loss": 2.3153, "step": 508700 }, { "epoch": 13.03, "learning_rate": 6.552925504662363e-06, "loss": 2.2504, "step": 508800 }, { "epoch": 13.04, "learning_rate": 6.544386378385764e-06, "loss": 2.3833, "step": 508900 }, { "epoch": 13.04, "learning_rate": 6.535847252109165e-06, "loss": 2.1683, "step": 509000 }, { "epoch": 13.04, "learning_rate": 6.5273081258325655e-06, "loss": 2.2484, "step": 509100 }, { "epoch": 13.04, "learning_rate": 6.518768999555966e-06, "loss": 2.4141, "step": 509200 }, { "epoch": 13.05, "learning_rate": 6.510229873279367e-06, "loss": 2.0949, "step": 509300 }, { "epoch": 13.05, "learning_rate": 6.501690747002767e-06, "loss": 2.3086, "step": 509400 }, { "epoch": 13.05, "learning_rate": 6.4931516207261675e-06, "loss": 2.0418, "step": 509500 }, { "epoch": 13.05, "learning_rate": 6.484612494449568e-06, "loss": 2.2146, "step": 509600 }, { "epoch": 13.06, "learning_rate": 6.476073368172969e-06, "loss": 2.2324, "step": 509700 }, { "epoch": 13.06, "learning_rate": 6.4675342418963696e-06, "loss": 2.4706, "step": 509800 }, { "epoch": 13.06, "learning_rate": 6.45899511561977e-06, "loss": 2.114, "step": 509900 }, { "epoch": 13.06, "learning_rate": 6.450455989343171e-06, "loss": 2.4077, "step": 510000 }, { "epoch": 13.07, "learning_rate": 6.441916863066572e-06, "loss": 2.4391, "step": 510100 }, { "epoch": 13.07, "learning_rate": 6.433377736789972e-06, "loss": 2.2424, "step": 510200 }, { "epoch": 13.07, "learning_rate": 6.424838610513373e-06, "loss": 2.2792, "step": 510300 }, { "epoch": 13.08, "learning_rate": 6.416299484236774e-06, "loss": 2.3093, "step": 510400 }, { "epoch": 13.08, "learning_rate": 6.4077603579601745e-06, "loss": 2.4905, "step": 510500 }, { "epoch": 13.08, "learning_rate": 6.399221231683575e-06, "loss": 2.4747, "step": 510600 }, { "epoch": 13.08, "learning_rate": 6.390682105406975e-06, "loss": 2.35, "step": 510700 }, { "epoch": 13.09, "learning_rate": 6.382142979130376e-06, "loss": 2.2641, "step": 510800 }, { "epoch": 13.09, "learning_rate": 6.3736038528537764e-06, "loss": 2.1705, "step": 510900 }, { "epoch": 13.09, "learning_rate": 6.365064726577177e-06, "loss": 2.203, "step": 511000 }, { "epoch": 13.09, "learning_rate": 6.356525600300578e-06, "loss": 2.3118, "step": 511100 }, { "epoch": 13.1, "learning_rate": 6.3479864740239785e-06, "loss": 2.3299, "step": 511200 }, { "epoch": 13.1, "learning_rate": 6.339447347747379e-06, "loss": 2.3658, "step": 511300 }, { "epoch": 13.1, "learning_rate": 6.33090822147078e-06, "loss": 2.4442, "step": 511400 }, { "epoch": 13.1, "learning_rate": 6.322369095194181e-06, "loss": 2.4042, "step": 511500 }, { "epoch": 13.11, "learning_rate": 6.313829968917581e-06, "loss": 2.4569, "step": 511600 }, { "epoch": 13.11, "learning_rate": 6.305290842640982e-06, "loss": 2.3307, "step": 511700 }, { "epoch": 13.11, "learning_rate": 6.296751716364383e-06, "loss": 2.1856, "step": 511800 }, { "epoch": 13.11, "learning_rate": 6.2882125900877835e-06, "loss": 2.218, "step": 511900 }, { "epoch": 13.12, "learning_rate": 6.279673463811183e-06, "loss": 2.173, "step": 512000 }, { "epoch": 13.12, "learning_rate": 6.271134337534584e-06, "loss": 2.3561, "step": 512100 }, { "epoch": 13.12, "learning_rate": 6.262595211257985e-06, "loss": 2.3229, "step": 512200 }, { "epoch": 13.12, "learning_rate": 6.254056084981385e-06, "loss": 2.2665, "step": 512300 }, { "epoch": 13.13, "learning_rate": 6.245516958704785e-06, "loss": 2.2048, "step": 512400 }, { "epoch": 13.13, "learning_rate": 6.236977832428186e-06, "loss": 2.1869, "step": 512500 }, { "epoch": 13.13, "learning_rate": 6.228438706151587e-06, "loss": 2.2518, "step": 512600 }, { "epoch": 13.13, "learning_rate": 6.219899579874987e-06, "loss": 2.3848, "step": 512700 }, { "epoch": 13.14, "learning_rate": 6.211360453598388e-06, "loss": 2.2685, "step": 512800 }, { "epoch": 13.14, "learning_rate": 6.202821327321789e-06, "loss": 2.2747, "step": 512900 }, { "epoch": 13.14, "learning_rate": 6.1942822010451895e-06, "loss": 2.2388, "step": 513000 }, { "epoch": 13.14, "learning_rate": 6.18574307476859e-06, "loss": 2.2014, "step": 513100 }, { "epoch": 13.15, "learning_rate": 6.177203948491991e-06, "loss": 2.2756, "step": 513200 }, { "epoch": 13.15, "learning_rate": 6.1686648222153916e-06, "loss": 2.4186, "step": 513300 }, { "epoch": 13.15, "learning_rate": 6.160125695938792e-06, "loss": 2.4534, "step": 513400 }, { "epoch": 13.15, "learning_rate": 6.151586569662193e-06, "loss": 2.3178, "step": 513500 }, { "epoch": 13.16, "learning_rate": 6.143047443385593e-06, "loss": 2.1683, "step": 513600 }, { "epoch": 13.16, "learning_rate": 6.1345083171089935e-06, "loss": 2.4042, "step": 513700 }, { "epoch": 13.16, "learning_rate": 6.125969190832394e-06, "loss": 2.3706, "step": 513800 }, { "epoch": 13.16, "learning_rate": 6.117430064555795e-06, "loss": 2.3816, "step": 513900 }, { "epoch": 13.17, "learning_rate": 6.108890938279196e-06, "loss": 2.3901, "step": 514000 }, { "epoch": 13.17, "learning_rate": 6.100351812002596e-06, "loss": 2.4142, "step": 514100 }, { "epoch": 13.17, "learning_rate": 6.091812685725997e-06, "loss": 2.3527, "step": 514200 }, { "epoch": 13.18, "learning_rate": 6.083273559449398e-06, "loss": 2.2065, "step": 514300 }, { "epoch": 13.18, "learning_rate": 6.0747344331727984e-06, "loss": 2.2061, "step": 514400 }, { "epoch": 13.18, "learning_rate": 6.066195306896199e-06, "loss": 2.392, "step": 514500 }, { "epoch": 13.18, "learning_rate": 6.0576561806196e-06, "loss": 2.3672, "step": 514600 }, { "epoch": 13.19, "learning_rate": 6.0491170543430005e-06, "loss": 2.4435, "step": 514700 }, { "epoch": 13.19, "learning_rate": 6.040577928066401e-06, "loss": 2.3277, "step": 514800 }, { "epoch": 13.19, "learning_rate": 6.032038801789801e-06, "loss": 2.2059, "step": 514900 }, { "epoch": 13.19, "learning_rate": 6.023499675513202e-06, "loss": 2.3794, "step": 515000 }, { "epoch": 13.2, "learning_rate": 6.014960549236602e-06, "loss": 2.3631, "step": 515100 }, { "epoch": 13.2, "learning_rate": 6.006421422960002e-06, "loss": 2.2747, "step": 515200 }, { "epoch": 13.2, "learning_rate": 5.997882296683403e-06, "loss": 2.2276, "step": 515300 }, { "epoch": 13.2, "learning_rate": 5.989343170406804e-06, "loss": 2.3502, "step": 515400 }, { "epoch": 13.21, "learning_rate": 5.9808040441302045e-06, "loss": 2.2641, "step": 515500 }, { "epoch": 13.21, "learning_rate": 5.972264917853605e-06, "loss": 2.4032, "step": 515600 }, { "epoch": 13.21, "learning_rate": 5.963725791577006e-06, "loss": 2.2596, "step": 515700 }, { "epoch": 13.21, "learning_rate": 5.9551866653004066e-06, "loss": 2.2879, "step": 515800 }, { "epoch": 13.22, "learning_rate": 5.946647539023807e-06, "loss": 2.2841, "step": 515900 }, { "epoch": 13.22, "learning_rate": 5.938108412747208e-06, "loss": 2.23, "step": 516000 }, { "epoch": 13.22, "learning_rate": 5.929569286470609e-06, "loss": 2.4033, "step": 516100 }, { "epoch": 13.22, "learning_rate": 5.921030160194009e-06, "loss": 2.1437, "step": 516200 }, { "epoch": 13.23, "learning_rate": 5.91249103391741e-06, "loss": 2.2891, "step": 516300 }, { "epoch": 13.23, "learning_rate": 5.903951907640811e-06, "loss": 2.1633, "step": 516400 }, { "epoch": 13.23, "learning_rate": 5.895412781364211e-06, "loss": 2.2754, "step": 516500 }, { "epoch": 13.23, "learning_rate": 5.886873655087611e-06, "loss": 2.3523, "step": 516600 }, { "epoch": 13.24, "learning_rate": 5.878334528811012e-06, "loss": 2.2518, "step": 516700 }, { "epoch": 13.24, "learning_rate": 5.869795402534413e-06, "loss": 2.3984, "step": 516800 }, { "epoch": 13.24, "learning_rate": 5.8612562762578134e-06, "loss": 2.3942, "step": 516900 }, { "epoch": 13.24, "learning_rate": 5.852717149981214e-06, "loss": 2.3366, "step": 517000 }, { "epoch": 13.25, "learning_rate": 5.844178023704615e-06, "loss": 2.2217, "step": 517100 }, { "epoch": 13.25, "learning_rate": 5.8356388974280155e-06, "loss": 2.3396, "step": 517200 }, { "epoch": 13.25, "learning_rate": 5.827099771151416e-06, "loss": 2.2669, "step": 517300 }, { "epoch": 13.25, "learning_rate": 5.818560644874817e-06, "loss": 2.3906, "step": 517400 }, { "epoch": 13.26, "learning_rate": 5.810021518598218e-06, "loss": 2.3159, "step": 517500 }, { "epoch": 13.26, "learning_rate": 5.801482392321618e-06, "loss": 2.5124, "step": 517600 }, { "epoch": 13.26, "learning_rate": 5.792943266045019e-06, "loss": 2.2749, "step": 517700 }, { "epoch": 13.26, "learning_rate": 5.784404139768419e-06, "loss": 2.417, "step": 517800 }, { "epoch": 13.27, "learning_rate": 5.77586501349182e-06, "loss": 2.4673, "step": 517900 }, { "epoch": 13.27, "learning_rate": 5.76732588721522e-06, "loss": 2.343, "step": 518000 }, { "epoch": 13.27, "learning_rate": 5.758786760938621e-06, "loss": 2.3252, "step": 518100 }, { "epoch": 13.27, "learning_rate": 5.750247634662022e-06, "loss": 2.5362, "step": 518200 }, { "epoch": 13.28, "learning_rate": 5.741708508385422e-06, "loss": 2.3327, "step": 518300 }, { "epoch": 13.28, "learning_rate": 5.733169382108823e-06, "loss": 2.347, "step": 518400 }, { "epoch": 13.28, "learning_rate": 5.724630255832224e-06, "loss": 2.3024, "step": 518500 }, { "epoch": 13.29, "learning_rate": 5.7160911295556245e-06, "loss": 2.278, "step": 518600 }, { "epoch": 13.29, "learning_rate": 5.707552003279025e-06, "loss": 2.3998, "step": 518700 }, { "epoch": 13.29, "learning_rate": 5.699012877002426e-06, "loss": 2.2746, "step": 518800 }, { "epoch": 13.29, "learning_rate": 5.690473750725827e-06, "loss": 2.2974, "step": 518900 }, { "epoch": 13.3, "learning_rate": 5.681934624449227e-06, "loss": 2.3175, "step": 519000 }, { "epoch": 13.3, "learning_rate": 5.673395498172627e-06, "loss": 2.3615, "step": 519100 }, { "epoch": 13.3, "learning_rate": 5.664856371896028e-06, "loss": 2.2808, "step": 519200 }, { "epoch": 13.3, "learning_rate": 5.6563172456194286e-06, "loss": 2.4497, "step": 519300 }, { "epoch": 13.31, "learning_rate": 5.647778119342828e-06, "loss": 2.4673, "step": 519400 }, { "epoch": 13.31, "learning_rate": 5.639238993066229e-06, "loss": 2.1913, "step": 519500 }, { "epoch": 13.31, "learning_rate": 5.63069986678963e-06, "loss": 2.1462, "step": 519600 }, { "epoch": 13.31, "learning_rate": 5.6221607405130305e-06, "loss": 2.3437, "step": 519700 }, { "epoch": 13.32, "learning_rate": 5.613621614236431e-06, "loss": 2.2628, "step": 519800 }, { "epoch": 13.32, "learning_rate": 5.605082487959832e-06, "loss": 2.1694, "step": 519900 }, { "epoch": 13.32, "learning_rate": 5.596543361683233e-06, "loss": 2.3273, "step": 520000 }, { "epoch": 13.32, "learning_rate": 5.588004235406633e-06, "loss": 2.2511, "step": 520100 }, { "epoch": 13.33, "learning_rate": 5.579465109130034e-06, "loss": 2.3144, "step": 520200 }, { "epoch": 13.33, "learning_rate": 5.570925982853435e-06, "loss": 2.3039, "step": 520300 }, { "epoch": 13.33, "learning_rate": 5.5623868565768354e-06, "loss": 2.286, "step": 520400 }, { "epoch": 13.33, "learning_rate": 5.553847730300236e-06, "loss": 2.2385, "step": 520500 }, { "epoch": 13.34, "learning_rate": 5.545308604023637e-06, "loss": 2.127, "step": 520600 }, { "epoch": 13.34, "learning_rate": 5.536769477747037e-06, "loss": 2.3676, "step": 520700 }, { "epoch": 13.34, "learning_rate": 5.528230351470437e-06, "loss": 2.3496, "step": 520800 }, { "epoch": 13.34, "learning_rate": 5.519691225193838e-06, "loss": 2.3599, "step": 520900 }, { "epoch": 13.35, "learning_rate": 5.511152098917239e-06, "loss": 2.1873, "step": 521000 }, { "epoch": 13.35, "learning_rate": 5.5026129726406395e-06, "loss": 2.292, "step": 521100 }, { "epoch": 13.35, "learning_rate": 5.49407384636404e-06, "loss": 2.2371, "step": 521200 }, { "epoch": 13.35, "learning_rate": 5.485534720087441e-06, "loss": 2.3498, "step": 521300 }, { "epoch": 13.36, "learning_rate": 5.476995593810842e-06, "loss": 2.2792, "step": 521400 }, { "epoch": 13.36, "learning_rate": 5.468456467534242e-06, "loss": 2.2241, "step": 521500 }, { "epoch": 13.36, "learning_rate": 5.459917341257643e-06, "loss": 2.3171, "step": 521600 }, { "epoch": 13.36, "learning_rate": 5.451378214981044e-06, "loss": 2.3056, "step": 521700 }, { "epoch": 13.37, "learning_rate": 5.442839088704444e-06, "loss": 2.1791, "step": 521800 }, { "epoch": 13.37, "learning_rate": 5.434299962427845e-06, "loss": 2.2324, "step": 521900 }, { "epoch": 13.37, "learning_rate": 5.425760836151245e-06, "loss": 2.2704, "step": 522000 }, { "epoch": 13.37, "learning_rate": 5.417221709874646e-06, "loss": 2.3341, "step": 522100 }, { "epoch": 13.38, "learning_rate": 5.408682583598046e-06, "loss": 2.1766, "step": 522200 }, { "epoch": 13.38, "learning_rate": 5.400143457321447e-06, "loss": 2.1573, "step": 522300 }, { "epoch": 13.38, "learning_rate": 5.391604331044848e-06, "loss": 2.2716, "step": 522400 }, { "epoch": 13.39, "learning_rate": 5.3830652047682485e-06, "loss": 2.2186, "step": 522500 }, { "epoch": 13.39, "learning_rate": 5.374526078491649e-06, "loss": 2.4815, "step": 522600 }, { "epoch": 13.39, "learning_rate": 5.36598695221505e-06, "loss": 2.5104, "step": 522700 }, { "epoch": 13.39, "learning_rate": 5.3574478259384506e-06, "loss": 2.3255, "step": 522800 }, { "epoch": 13.4, "learning_rate": 5.348908699661851e-06, "loss": 2.3178, "step": 522900 }, { "epoch": 13.4, "learning_rate": 5.340369573385252e-06, "loss": 2.2776, "step": 523000 }, { "epoch": 13.4, "learning_rate": 5.331830447108653e-06, "loss": 2.1851, "step": 523100 }, { "epoch": 13.4, "learning_rate": 5.323291320832053e-06, "loss": 2.4296, "step": 523200 }, { "epoch": 13.41, "learning_rate": 5.314752194555453e-06, "loss": 2.2819, "step": 523300 }, { "epoch": 13.41, "learning_rate": 5.306213068278854e-06, "loss": 2.322, "step": 523400 }, { "epoch": 13.41, "learning_rate": 5.297673942002255e-06, "loss": 2.1871, "step": 523500 }, { "epoch": 13.41, "learning_rate": 5.289134815725655e-06, "loss": 2.382, "step": 523600 }, { "epoch": 13.42, "learning_rate": 5.280595689449056e-06, "loss": 2.51, "step": 523700 }, { "epoch": 13.42, "learning_rate": 5.272056563172456e-06, "loss": 2.5047, "step": 523800 }, { "epoch": 13.42, "learning_rate": 5.263517436895857e-06, "loss": 2.1496, "step": 523900 }, { "epoch": 13.42, "learning_rate": 5.254978310619257e-06, "loss": 2.3528, "step": 524000 }, { "epoch": 13.43, "learning_rate": 5.246439184342658e-06, "loss": 2.23, "step": 524100 }, { "epoch": 13.43, "learning_rate": 5.237900058066059e-06, "loss": 2.2, "step": 524200 }, { "epoch": 13.43, "learning_rate": 5.229360931789459e-06, "loss": 2.4509, "step": 524300 }, { "epoch": 13.43, "learning_rate": 5.22082180551286e-06, "loss": 2.218, "step": 524400 }, { "epoch": 13.44, "learning_rate": 5.212282679236261e-06, "loss": 2.1552, "step": 524500 }, { "epoch": 13.44, "learning_rate": 5.2037435529596615e-06, "loss": 2.4082, "step": 524600 }, { "epoch": 13.44, "learning_rate": 5.195204426683062e-06, "loss": 2.1625, "step": 524700 }, { "epoch": 13.44, "learning_rate": 5.186665300406463e-06, "loss": 2.3216, "step": 524800 }, { "epoch": 13.45, "learning_rate": 5.178126174129863e-06, "loss": 2.2809, "step": 524900 }, { "epoch": 13.45, "learning_rate": 5.1695870478532634e-06, "loss": 2.185, "step": 525000 }, { "epoch": 13.45, "learning_rate": 5.161047921576664e-06, "loss": 2.2444, "step": 525100 }, { "epoch": 13.45, "learning_rate": 5.152508795300065e-06, "loss": 2.3672, "step": 525200 }, { "epoch": 13.46, "learning_rate": 5.1439696690234656e-06, "loss": 2.2422, "step": 525300 }, { "epoch": 13.46, "learning_rate": 5.135430542746866e-06, "loss": 2.1887, "step": 525400 }, { "epoch": 13.46, "learning_rate": 5.126891416470267e-06, "loss": 2.2537, "step": 525500 }, { "epoch": 13.46, "learning_rate": 5.118352290193668e-06, "loss": 2.4022, "step": 525600 }, { "epoch": 13.47, "learning_rate": 5.109813163917068e-06, "loss": 2.3139, "step": 525700 }, { "epoch": 13.47, "learning_rate": 5.101274037640469e-06, "loss": 2.2153, "step": 525800 }, { "epoch": 13.47, "learning_rate": 5.09273491136387e-06, "loss": 2.3421, "step": 525900 }, { "epoch": 13.47, "learning_rate": 5.0841957850872705e-06, "loss": 2.3735, "step": 526000 }, { "epoch": 13.48, "learning_rate": 5.075656658810671e-06, "loss": 2.3653, "step": 526100 }, { "epoch": 13.48, "learning_rate": 5.067117532534071e-06, "loss": 2.3226, "step": 526200 }, { "epoch": 13.48, "learning_rate": 5.058578406257472e-06, "loss": 2.2424, "step": 526300 }, { "epoch": 13.48, "learning_rate": 5.050039279980872e-06, "loss": 2.319, "step": 526400 }, { "epoch": 13.49, "learning_rate": 5.041500153704273e-06, "loss": 2.1001, "step": 526500 }, { "epoch": 13.49, "learning_rate": 5.032961027427674e-06, "loss": 2.3242, "step": 526600 }, { "epoch": 13.49, "learning_rate": 5.0244219011510745e-06, "loss": 2.1981, "step": 526700 }, { "epoch": 13.5, "learning_rate": 5.015882774874475e-06, "loss": 2.3837, "step": 526800 }, { "epoch": 13.5, "learning_rate": 5.007343648597876e-06, "loss": 2.2706, "step": 526900 }, { "epoch": 13.5, "learning_rate": 4.998804522321277e-06, "loss": 2.3966, "step": 527000 }, { "epoch": 13.5, "learning_rate": 4.990265396044677e-06, "loss": 2.1872, "step": 527100 }, { "epoch": 13.51, "learning_rate": 4.981726269768078e-06, "loss": 2.3085, "step": 527200 }, { "epoch": 13.51, "learning_rate": 4.973187143491479e-06, "loss": 2.1895, "step": 527300 }, { "epoch": 13.51, "learning_rate": 4.9646480172148794e-06, "loss": 2.2384, "step": 527400 }, { "epoch": 13.51, "learning_rate": 4.956108890938279e-06, "loss": 2.3372, "step": 527500 }, { "epoch": 13.52, "learning_rate": 4.94756976466168e-06, "loss": 2.2382, "step": 527600 }, { "epoch": 13.52, "learning_rate": 4.939030638385081e-06, "loss": 2.4529, "step": 527700 }, { "epoch": 13.52, "learning_rate": 4.930491512108481e-06, "loss": 2.2532, "step": 527800 }, { "epoch": 13.52, "learning_rate": 4.921952385831882e-06, "loss": 2.386, "step": 527900 }, { "epoch": 13.53, "learning_rate": 4.913413259555283e-06, "loss": 2.3378, "step": 528000 }, { "epoch": 13.53, "learning_rate": 4.9048741332786835e-06, "loss": 2.4536, "step": 528100 }, { "epoch": 13.53, "learning_rate": 4.896335007002083e-06, "loss": 2.3369, "step": 528200 }, { "epoch": 13.53, "learning_rate": 4.887795880725484e-06, "loss": 2.4021, "step": 528300 }, { "epoch": 13.54, "learning_rate": 4.879256754448885e-06, "loss": 2.283, "step": 528400 }, { "epoch": 13.54, "learning_rate": 4.8707176281722854e-06, "loss": 2.2235, "step": 528500 }, { "epoch": 13.54, "learning_rate": 4.862178501895686e-06, "loss": 2.2042, "step": 528600 }, { "epoch": 13.54, "learning_rate": 4.853639375619087e-06, "loss": 2.1249, "step": 528700 }, { "epoch": 13.55, "learning_rate": 4.8451002493424876e-06, "loss": 2.469, "step": 528800 }, { "epoch": 13.55, "learning_rate": 4.836561123065888e-06, "loss": 2.3044, "step": 528900 }, { "epoch": 13.55, "learning_rate": 4.828021996789289e-06, "loss": 2.4205, "step": 529000 }, { "epoch": 13.55, "learning_rate": 4.819482870512689e-06, "loss": 2.3678, "step": 529100 }, { "epoch": 13.56, "learning_rate": 4.8109437442360895e-06, "loss": 2.2873, "step": 529200 }, { "epoch": 13.56, "learning_rate": 4.80240461795949e-06, "loss": 2.2397, "step": 529300 }, { "epoch": 13.56, "learning_rate": 4.793865491682891e-06, "loss": 2.2666, "step": 529400 }, { "epoch": 13.56, "learning_rate": 4.785326365406292e-06, "loss": 2.1234, "step": 529500 }, { "epoch": 13.57, "learning_rate": 4.776787239129692e-06, "loss": 2.297, "step": 529600 }, { "epoch": 13.57, "learning_rate": 4.768248112853093e-06, "loss": 2.2197, "step": 529700 }, { "epoch": 13.57, "learning_rate": 4.759708986576494e-06, "loss": 2.3912, "step": 529800 }, { "epoch": 13.57, "learning_rate": 4.751169860299894e-06, "loss": 2.2514, "step": 529900 }, { "epoch": 13.58, "learning_rate": 4.742630734023295e-06, "loss": 2.2146, "step": 530000 }, { "epoch": 13.58, "learning_rate": 4.734091607746696e-06, "loss": 2.2581, "step": 530100 }, { "epoch": 13.58, "learning_rate": 4.7255524814700965e-06, "loss": 2.2877, "step": 530200 }, { "epoch": 13.58, "learning_rate": 4.717013355193497e-06, "loss": 2.277, "step": 530300 }, { "epoch": 13.59, "learning_rate": 4.708474228916897e-06, "loss": 2.1507, "step": 530400 }, { "epoch": 13.59, "learning_rate": 4.699935102640298e-06, "loss": 2.3655, "step": 530500 }, { "epoch": 13.59, "learning_rate": 4.6913959763636985e-06, "loss": 2.3029, "step": 530600 }, { "epoch": 13.6, "learning_rate": 4.682856850087099e-06, "loss": 2.3902, "step": 530700 }, { "epoch": 13.6, "learning_rate": 4.6743177238105e-06, "loss": 2.4023, "step": 530800 }, { "epoch": 13.6, "learning_rate": 4.665778597533901e-06, "loss": 2.3543, "step": 530900 }, { "epoch": 13.6, "learning_rate": 4.657239471257301e-06, "loss": 2.4241, "step": 531000 }, { "epoch": 13.61, "learning_rate": 4.648700344980702e-06, "loss": 2.4799, "step": 531100 }, { "epoch": 13.61, "learning_rate": 4.640161218704103e-06, "loss": 2.2627, "step": 531200 }, { "epoch": 13.61, "learning_rate": 4.631622092427503e-06, "loss": 2.2909, "step": 531300 }, { "epoch": 13.61, "learning_rate": 4.623082966150904e-06, "loss": 2.3981, "step": 531400 }, { "epoch": 13.62, "learning_rate": 4.614543839874305e-06, "loss": 2.2313, "step": 531500 }, { "epoch": 13.62, "learning_rate": 4.6060047135977055e-06, "loss": 2.2898, "step": 531600 }, { "epoch": 13.62, "learning_rate": 4.597465587321105e-06, "loss": 2.0953, "step": 531700 }, { "epoch": 13.62, "learning_rate": 4.588926461044506e-06, "loss": 2.3292, "step": 531800 }, { "epoch": 13.63, "learning_rate": 4.580387334767907e-06, "loss": 2.2417, "step": 531900 }, { "epoch": 13.63, "learning_rate": 4.5718482084913074e-06, "loss": 2.2797, "step": 532000 }, { "epoch": 13.63, "learning_rate": 4.563309082214708e-06, "loss": 2.4571, "step": 532100 }, { "epoch": 13.63, "learning_rate": 4.554769955938109e-06, "loss": 2.3237, "step": 532200 }, { "epoch": 13.64, "learning_rate": 4.5462308296615096e-06, "loss": 2.2959, "step": 532300 }, { "epoch": 13.64, "learning_rate": 4.53769170338491e-06, "loss": 2.474, "step": 532400 }, { "epoch": 13.64, "learning_rate": 4.529152577108311e-06, "loss": 2.2593, "step": 532500 }, { "epoch": 13.64, "learning_rate": 4.520613450831711e-06, "loss": 2.4216, "step": 532600 }, { "epoch": 13.65, "learning_rate": 4.5120743245551115e-06, "loss": 2.4381, "step": 532700 }, { "epoch": 13.65, "learning_rate": 4.503535198278512e-06, "loss": 2.3255, "step": 532800 }, { "epoch": 13.65, "learning_rate": 4.494996072001913e-06, "loss": 2.2518, "step": 532900 }, { "epoch": 13.65, "learning_rate": 4.486456945725314e-06, "loss": 2.1913, "step": 533000 }, { "epoch": 13.66, "learning_rate": 4.477917819448714e-06, "loss": 2.2473, "step": 533100 }, { "epoch": 13.66, "learning_rate": 4.469378693172115e-06, "loss": 2.4085, "step": 533200 }, { "epoch": 13.66, "learning_rate": 4.460839566895515e-06, "loss": 2.2575, "step": 533300 }, { "epoch": 13.66, "learning_rate": 4.4523004406189156e-06, "loss": 2.4073, "step": 533400 }, { "epoch": 13.67, "learning_rate": 4.443761314342316e-06, "loss": 2.356, "step": 533500 }, { "epoch": 13.67, "learning_rate": 4.435222188065717e-06, "loss": 2.306, "step": 533600 }, { "epoch": 13.67, "learning_rate": 4.426683061789118e-06, "loss": 2.1347, "step": 533700 }, { "epoch": 13.67, "learning_rate": 4.418143935512518e-06, "loss": 2.2145, "step": 533800 }, { "epoch": 13.68, "learning_rate": 4.409604809235919e-06, "loss": 2.2796, "step": 533900 }, { "epoch": 13.68, "learning_rate": 4.40106568295932e-06, "loss": 2.3241, "step": 534000 }, { "epoch": 13.68, "learning_rate": 4.3925265566827205e-06, "loss": 2.2354, "step": 534100 }, { "epoch": 13.68, "learning_rate": 4.383987430406121e-06, "loss": 2.1824, "step": 534200 }, { "epoch": 13.69, "learning_rate": 4.375448304129522e-06, "loss": 2.35, "step": 534300 }, { "epoch": 13.69, "learning_rate": 4.366909177852923e-06, "loss": 2.3045, "step": 534400 }, { "epoch": 13.69, "learning_rate": 4.358370051576323e-06, "loss": 2.3268, "step": 534500 }, { "epoch": 13.7, "learning_rate": 4.349830925299723e-06, "loss": 2.2335, "step": 534600 }, { "epoch": 13.7, "learning_rate": 4.341291799023124e-06, "loss": 2.2382, "step": 534700 }, { "epoch": 13.7, "learning_rate": 4.3327526727465245e-06, "loss": 2.3745, "step": 534800 }, { "epoch": 13.7, "learning_rate": 4.324213546469925e-06, "loss": 2.4012, "step": 534900 }, { "epoch": 13.71, "learning_rate": 4.315674420193326e-06, "loss": 2.3895, "step": 535000 }, { "epoch": 13.71, "learning_rate": 4.307135293916727e-06, "loss": 2.276, "step": 535100 }, { "epoch": 13.71, "learning_rate": 4.298596167640127e-06, "loss": 2.3365, "step": 535200 }, { "epoch": 13.71, "learning_rate": 4.290057041363528e-06, "loss": 2.1478, "step": 535300 }, { "epoch": 13.72, "learning_rate": 4.281517915086929e-06, "loss": 2.2136, "step": 535400 }, { "epoch": 13.72, "learning_rate": 4.2729787888103294e-06, "loss": 2.1603, "step": 535500 }, { "epoch": 13.72, "learning_rate": 4.26443966253373e-06, "loss": 2.2522, "step": 535600 }, { "epoch": 13.72, "learning_rate": 4.255900536257131e-06, "loss": 2.3462, "step": 535700 }, { "epoch": 13.73, "learning_rate": 4.2473614099805316e-06, "loss": 2.4739, "step": 535800 }, { "epoch": 13.73, "learning_rate": 4.238822283703931e-06, "loss": 2.2846, "step": 535900 }, { "epoch": 13.73, "learning_rate": 4.230283157427332e-06, "loss": 2.3267, "step": 536000 }, { "epoch": 13.73, "learning_rate": 4.221744031150733e-06, "loss": 2.2946, "step": 536100 }, { "epoch": 13.74, "learning_rate": 4.2132049048741335e-06, "loss": 2.1301, "step": 536200 }, { "epoch": 13.74, "learning_rate": 4.204665778597534e-06, "loss": 2.4257, "step": 536300 }, { "epoch": 13.74, "learning_rate": 4.196126652320935e-06, "loss": 2.2667, "step": 536400 }, { "epoch": 13.74, "learning_rate": 4.187587526044336e-06, "loss": 2.2935, "step": 536500 }, { "epoch": 13.75, "learning_rate": 4.179048399767736e-06, "loss": 2.4029, "step": 536600 }, { "epoch": 13.75, "learning_rate": 4.170509273491137e-06, "loss": 2.204, "step": 536700 }, { "epoch": 13.75, "learning_rate": 4.161970147214538e-06, "loss": 2.4656, "step": 536800 }, { "epoch": 13.75, "learning_rate": 4.1534310209379376e-06, "loss": 2.3937, "step": 536900 }, { "epoch": 13.76, "learning_rate": 4.144891894661338e-06, "loss": 2.2908, "step": 537000 }, { "epoch": 13.76, "learning_rate": 4.136352768384739e-06, "loss": 2.2052, "step": 537100 }, { "epoch": 13.76, "learning_rate": 4.12781364210814e-06, "loss": 2.3669, "step": 537200 }, { "epoch": 13.76, "learning_rate": 4.11927451583154e-06, "loss": 2.2515, "step": 537300 }, { "epoch": 13.77, "learning_rate": 4.110735389554941e-06, "loss": 2.2041, "step": 537400 }, { "epoch": 13.77, "learning_rate": 4.102196263278341e-06, "loss": 2.2503, "step": 537500 }, { "epoch": 13.77, "learning_rate": 4.093657137001742e-06, "loss": 2.3934, "step": 537600 }, { "epoch": 13.77, "learning_rate": 4.085118010725142e-06, "loss": 2.3733, "step": 537700 }, { "epoch": 13.78, "learning_rate": 4.076578884448543e-06, "loss": 2.3535, "step": 537800 }, { "epoch": 13.78, "learning_rate": 4.068039758171944e-06, "loss": 2.1226, "step": 537900 }, { "epoch": 13.78, "learning_rate": 4.0595006318953444e-06, "loss": 2.3963, "step": 538000 }, { "epoch": 13.78, "learning_rate": 4.050961505618745e-06, "loss": 2.3109, "step": 538100 }, { "epoch": 13.79, "learning_rate": 4.042422379342146e-06, "loss": 2.2519, "step": 538200 }, { "epoch": 13.79, "learning_rate": 4.0338832530655465e-06, "loss": 2.2264, "step": 538300 }, { "epoch": 13.79, "learning_rate": 4.025344126788947e-06, "loss": 2.3696, "step": 538400 }, { "epoch": 13.79, "learning_rate": 4.016805000512348e-06, "loss": 2.3853, "step": 538500 }, { "epoch": 13.8, "learning_rate": 4.008265874235749e-06, "loss": 2.243, "step": 538600 }, { "epoch": 13.8, "learning_rate": 3.999726747959149e-06, "loss": 2.1949, "step": 538700 }, { "epoch": 13.8, "learning_rate": 3.991187621682549e-06, "loss": 2.4244, "step": 538800 }, { "epoch": 13.81, "learning_rate": 3.98264849540595e-06, "loss": 2.3751, "step": 538900 }, { "epoch": 13.81, "learning_rate": 3.974109369129351e-06, "loss": 2.3418, "step": 539000 }, { "epoch": 13.81, "learning_rate": 3.965570242852751e-06, "loss": 2.3028, "step": 539100 }, { "epoch": 13.81, "learning_rate": 3.957031116576152e-06, "loss": 2.3154, "step": 539200 }, { "epoch": 13.82, "learning_rate": 3.948491990299553e-06, "loss": 2.2674, "step": 539300 }, { "epoch": 13.82, "learning_rate": 3.939952864022953e-06, "loss": 2.3125, "step": 539400 }, { "epoch": 13.82, "learning_rate": 3.931413737746354e-06, "loss": 2.4033, "step": 539500 }, { "epoch": 13.82, "learning_rate": 3.922874611469755e-06, "loss": 2.2926, "step": 539600 }, { "epoch": 13.83, "learning_rate": 3.9143354851931555e-06, "loss": 2.4927, "step": 539700 }, { "epoch": 13.83, "learning_rate": 3.905796358916556e-06, "loss": 2.3426, "step": 539800 }, { "epoch": 13.83, "learning_rate": 3.897257232639957e-06, "loss": 2.3957, "step": 539900 }, { "epoch": 13.83, "learning_rate": 3.888718106363358e-06, "loss": 2.3603, "step": 540000 }, { "epoch": 13.84, "learning_rate": 3.8801789800867575e-06, "loss": 2.2371, "step": 540100 }, { "epoch": 13.84, "learning_rate": 3.871639853810158e-06, "loss": 2.393, "step": 540200 }, { "epoch": 13.84, "learning_rate": 3.863100727533559e-06, "loss": 2.1292, "step": 540300 }, { "epoch": 13.84, "learning_rate": 3.8545616012569596e-06, "loss": 2.3997, "step": 540400 }, { "epoch": 13.85, "learning_rate": 3.84602247498036e-06, "loss": 2.4442, "step": 540500 }, { "epoch": 13.85, "learning_rate": 3.837483348703761e-06, "loss": 2.1941, "step": 540600 }, { "epoch": 13.85, "learning_rate": 3.828944222427162e-06, "loss": 2.3835, "step": 540700 }, { "epoch": 13.85, "learning_rate": 3.820405096150562e-06, "loss": 2.225, "step": 540800 }, { "epoch": 13.86, "learning_rate": 3.811865969873963e-06, "loss": 2.2939, "step": 540900 }, { "epoch": 13.86, "learning_rate": 3.8033268435973638e-06, "loss": 2.1806, "step": 541000 }, { "epoch": 13.86, "learning_rate": 3.794787717320764e-06, "loss": 2.4158, "step": 541100 }, { "epoch": 13.86, "learning_rate": 3.7862485910441648e-06, "loss": 2.3077, "step": 541200 }, { "epoch": 13.87, "learning_rate": 3.7777094647675646e-06, "loss": 2.2717, "step": 541300 }, { "epoch": 13.87, "learning_rate": 3.7691703384909653e-06, "loss": 2.4561, "step": 541400 }, { "epoch": 13.87, "learning_rate": 3.760631212214366e-06, "loss": 2.2191, "step": 541500 }, { "epoch": 13.87, "learning_rate": 3.7520920859377667e-06, "loss": 2.2821, "step": 541600 }, { "epoch": 13.88, "learning_rate": 3.7435529596611674e-06, "loss": 2.1932, "step": 541700 }, { "epoch": 13.88, "learning_rate": 3.735013833384568e-06, "loss": 2.1749, "step": 541800 }, { "epoch": 13.88, "learning_rate": 3.726474707107969e-06, "loss": 2.3083, "step": 541900 }, { "epoch": 13.88, "learning_rate": 3.717935580831369e-06, "loss": 2.2537, "step": 542000 }, { "epoch": 13.89, "learning_rate": 3.70939645455477e-06, "loss": 2.2607, "step": 542100 }, { "epoch": 13.89, "learning_rate": 3.7008573282781705e-06, "loss": 2.3312, "step": 542200 }, { "epoch": 13.89, "learning_rate": 3.692318202001571e-06, "loss": 2.3231, "step": 542300 }, { "epoch": 13.89, "learning_rate": 3.683779075724972e-06, "loss": 2.2841, "step": 542400 }, { "epoch": 13.9, "learning_rate": 3.6752399494483726e-06, "loss": 2.1106, "step": 542500 }, { "epoch": 13.9, "learning_rate": 3.6667008231717733e-06, "loss": 2.5235, "step": 542600 }, { "epoch": 13.9, "learning_rate": 3.6581616968951736e-06, "loss": 2.3049, "step": 542700 }, { "epoch": 13.91, "learning_rate": 3.6496225706185743e-06, "loss": 2.1696, "step": 542800 }, { "epoch": 13.91, "learning_rate": 3.641083444341975e-06, "loss": 2.3529, "step": 542900 }, { "epoch": 13.91, "learning_rate": 3.6325443180653757e-06, "loss": 2.5494, "step": 543000 }, { "epoch": 13.91, "learning_rate": 3.6240051917887764e-06, "loss": 2.4923, "step": 543100 }, { "epoch": 13.92, "learning_rate": 3.615466065512177e-06, "loss": 2.2472, "step": 543200 }, { "epoch": 13.92, "learning_rate": 3.6069269392355774e-06, "loss": 2.1455, "step": 543300 }, { "epoch": 13.92, "learning_rate": 3.598387812958978e-06, "loss": 2.2442, "step": 543400 }, { "epoch": 13.92, "learning_rate": 3.5898486866823788e-06, "loss": 2.2608, "step": 543500 }, { "epoch": 13.93, "learning_rate": 3.5813095604057795e-06, "loss": 2.4307, "step": 543600 }, { "epoch": 13.93, "learning_rate": 3.57277043412918e-06, "loss": 2.5147, "step": 543700 }, { "epoch": 13.93, "learning_rate": 3.564231307852581e-06, "loss": 2.2385, "step": 543800 }, { "epoch": 13.93, "learning_rate": 3.5556921815759816e-06, "loss": 2.1395, "step": 543900 }, { "epoch": 13.94, "learning_rate": 3.547153055299382e-06, "loss": 2.3087, "step": 544000 }, { "epoch": 13.94, "learning_rate": 3.5386139290227826e-06, "loss": 2.2105, "step": 544100 }, { "epoch": 13.94, "learning_rate": 3.5300748027461833e-06, "loss": 2.3762, "step": 544200 }, { "epoch": 13.94, "learning_rate": 3.521535676469584e-06, "loss": 2.297, "step": 544300 }, { "epoch": 13.95, "learning_rate": 3.5129965501929847e-06, "loss": 2.3126, "step": 544400 }, { "epoch": 13.95, "learning_rate": 3.5044574239163854e-06, "loss": 2.1932, "step": 544500 }, { "epoch": 13.95, "learning_rate": 3.4959182976397856e-06, "loss": 2.3053, "step": 544600 }, { "epoch": 13.95, "learning_rate": 3.4873791713631863e-06, "loss": 2.4063, "step": 544700 }, { "epoch": 13.96, "learning_rate": 3.478840045086587e-06, "loss": 2.2226, "step": 544800 }, { "epoch": 13.96, "learning_rate": 3.4703009188099877e-06, "loss": 2.3015, "step": 544900 }, { "epoch": 13.96, "learning_rate": 3.4617617925333884e-06, "loss": 2.0615, "step": 545000 }, { "epoch": 13.96, "learning_rate": 3.453222666256789e-06, "loss": 2.3217, "step": 545100 }, { "epoch": 13.97, "learning_rate": 3.44468353998019e-06, "loss": 2.2258, "step": 545200 }, { "epoch": 13.97, "learning_rate": 3.43614441370359e-06, "loss": 2.5063, "step": 545300 }, { "epoch": 13.97, "learning_rate": 3.427605287426991e-06, "loss": 2.1059, "step": 545400 }, { "epoch": 13.97, "learning_rate": 3.4190661611503915e-06, "loss": 2.294, "step": 545500 }, { "epoch": 13.98, "learning_rate": 3.4105270348737922e-06, "loss": 2.1908, "step": 545600 }, { "epoch": 13.98, "learning_rate": 3.401987908597192e-06, "loss": 2.4956, "step": 545700 }, { "epoch": 13.98, "learning_rate": 3.3934487823205928e-06, "loss": 2.4304, "step": 545800 }, { "epoch": 13.98, "learning_rate": 3.3849096560439935e-06, "loss": 2.228, "step": 545900 }, { "epoch": 13.99, "learning_rate": 3.376370529767394e-06, "loss": 2.1937, "step": 546000 }, { "epoch": 13.99, "learning_rate": 3.367831403490795e-06, "loss": 2.1773, "step": 546100 }, { "epoch": 13.99, "learning_rate": 3.359292277214195e-06, "loss": 2.2041, "step": 546200 }, { "epoch": 13.99, "learning_rate": 3.350753150937596e-06, "loss": 2.359, "step": 546300 }, { "epoch": 14.0, "learning_rate": 3.3422140246609966e-06, "loss": 2.4075, "step": 546400 }, { "epoch": 14.0, "learning_rate": 3.3336748983843973e-06, "loss": 2.3136, "step": 546500 }, { "epoch": 14.0, "learning_rate": 3.325135772107798e-06, "loss": 2.3028, "step": 546600 }, { "epoch": 14.01, "learning_rate": 3.3165966458311987e-06, "loss": 2.3709, "step": 546700 }, { "epoch": 14.01, "learning_rate": 3.3080575195545994e-06, "loss": 2.084, "step": 546800 }, { "epoch": 14.01, "learning_rate": 3.2995183932779996e-06, "loss": 2.4734, "step": 546900 }, { "epoch": 14.01, "learning_rate": 3.2909792670014003e-06, "loss": 2.1511, "step": 547000 }, { "epoch": 14.02, "learning_rate": 3.282440140724801e-06, "loss": 2.2915, "step": 547100 }, { "epoch": 14.02, "learning_rate": 3.2739010144482017e-06, "loss": 2.2365, "step": 547200 }, { "epoch": 14.02, "learning_rate": 3.2653618881716024e-06, "loss": 2.3072, "step": 547300 }, { "epoch": 14.02, "learning_rate": 3.256822761895003e-06, "loss": 2.2433, "step": 547400 }, { "epoch": 14.03, "learning_rate": 3.2482836356184034e-06, "loss": 2.3882, "step": 547500 }, { "epoch": 14.03, "learning_rate": 3.239744509341804e-06, "loss": 2.1129, "step": 547600 }, { "epoch": 14.03, "learning_rate": 3.231205383065205e-06, "loss": 2.3175, "step": 547700 }, { "epoch": 14.03, "learning_rate": 3.2226662567886055e-06, "loss": 2.402, "step": 547800 }, { "epoch": 14.04, "learning_rate": 3.2141271305120062e-06, "loss": 2.2457, "step": 547900 }, { "epoch": 14.04, "learning_rate": 3.205588004235407e-06, "loss": 2.285, "step": 548000 }, { "epoch": 14.04, "learning_rate": 3.1970488779588076e-06, "loss": 2.2699, "step": 548100 }, { "epoch": 14.04, "learning_rate": 3.188509751682208e-06, "loss": 2.1153, "step": 548200 }, { "epoch": 14.05, "learning_rate": 3.1799706254056086e-06, "loss": 2.2285, "step": 548300 }, { "epoch": 14.05, "learning_rate": 3.1714314991290093e-06, "loss": 2.2391, "step": 548400 }, { "epoch": 14.05, "learning_rate": 3.16289237285241e-06, "loss": 2.4201, "step": 548500 }, { "epoch": 14.05, "learning_rate": 3.1543532465758107e-06, "loss": 2.2739, "step": 548600 }, { "epoch": 14.06, "learning_rate": 3.1458141202992114e-06, "loss": 2.3189, "step": 548700 }, { "epoch": 14.06, "learning_rate": 3.137274994022612e-06, "loss": 2.39, "step": 548800 }, { "epoch": 14.06, "learning_rate": 3.1287358677460124e-06, "loss": 2.2188, "step": 548900 }, { "epoch": 14.06, "learning_rate": 3.1201967414694127e-06, "loss": 2.2681, "step": 549000 }, { "epoch": 14.07, "learning_rate": 3.1116576151928134e-06, "loss": 2.262, "step": 549100 }, { "epoch": 14.07, "learning_rate": 3.103118488916214e-06, "loss": 2.3291, "step": 549200 }, { "epoch": 14.07, "learning_rate": 3.0945793626396148e-06, "loss": 2.1329, "step": 549300 }, { "epoch": 14.07, "learning_rate": 3.0860402363630155e-06, "loss": 2.0695, "step": 549400 }, { "epoch": 14.08, "learning_rate": 3.077501110086416e-06, "loss": 2.5006, "step": 549500 }, { "epoch": 14.08, "learning_rate": 3.0689619838098165e-06, "loss": 2.2184, "step": 549600 }, { "epoch": 14.08, "learning_rate": 3.060422857533217e-06, "loss": 2.088, "step": 549700 }, { "epoch": 14.08, "learning_rate": 3.051883731256618e-06, "loss": 2.2616, "step": 549800 }, { "epoch": 14.09, "learning_rate": 3.0433446049800186e-06, "loss": 2.2769, "step": 549900 }, { "epoch": 14.09, "learning_rate": 3.0348054787034193e-06, "loss": 2.3596, "step": 550000 }, { "epoch": 14.09, "learning_rate": 3.02626635242682e-06, "loss": 2.2853, "step": 550100 }, { "epoch": 14.09, "learning_rate": 3.0177272261502207e-06, "loss": 2.2473, "step": 550200 }, { "epoch": 14.1, "learning_rate": 3.009188099873621e-06, "loss": 2.3455, "step": 550300 }, { "epoch": 14.1, "learning_rate": 3.0006489735970216e-06, "loss": 2.3338, "step": 550400 }, { "epoch": 14.1, "learning_rate": 2.9921098473204223e-06, "loss": 2.2896, "step": 550500 }, { "epoch": 14.1, "learning_rate": 2.983570721043823e-06, "loss": 2.2142, "step": 550600 }, { "epoch": 14.11, "learning_rate": 2.9750315947672237e-06, "loss": 2.3097, "step": 550700 }, { "epoch": 14.11, "learning_rate": 2.9664924684906244e-06, "loss": 2.2682, "step": 550800 }, { "epoch": 14.11, "learning_rate": 2.957953342214025e-06, "loss": 2.301, "step": 550900 }, { "epoch": 14.12, "learning_rate": 2.9494142159374254e-06, "loss": 2.5299, "step": 551000 }, { "epoch": 14.12, "learning_rate": 2.940875089660826e-06, "loss": 2.3803, "step": 551100 }, { "epoch": 14.12, "learning_rate": 2.9323359633842264e-06, "loss": 2.1536, "step": 551200 }, { "epoch": 14.12, "learning_rate": 2.923796837107627e-06, "loss": 2.3938, "step": 551300 }, { "epoch": 14.13, "learning_rate": 2.915257710831028e-06, "loss": 2.2973, "step": 551400 }, { "epoch": 14.13, "learning_rate": 2.9067185845544285e-06, "loss": 2.2337, "step": 551500 }, { "epoch": 14.13, "learning_rate": 2.898179458277829e-06, "loss": 2.2885, "step": 551600 }, { "epoch": 14.13, "learning_rate": 2.8896403320012295e-06, "loss": 2.3434, "step": 551700 }, { "epoch": 14.14, "learning_rate": 2.88110120572463e-06, "loss": 2.3131, "step": 551800 }, { "epoch": 14.14, "learning_rate": 2.872562079448031e-06, "loss": 2.3793, "step": 551900 }, { "epoch": 14.14, "learning_rate": 2.8640229531714316e-06, "loss": 2.0242, "step": 552000 }, { "epoch": 14.14, "learning_rate": 2.8554838268948323e-06, "loss": 2.3354, "step": 552100 }, { "epoch": 14.15, "learning_rate": 2.846944700618233e-06, "loss": 2.2313, "step": 552200 }, { "epoch": 14.15, "learning_rate": 2.8384055743416337e-06, "loss": 2.346, "step": 552300 }, { "epoch": 14.15, "learning_rate": 2.829866448065034e-06, "loss": 2.3449, "step": 552400 }, { "epoch": 14.15, "learning_rate": 2.8213273217884347e-06, "loss": 2.2808, "step": 552500 }, { "epoch": 14.16, "learning_rate": 2.8127881955118354e-06, "loss": 2.205, "step": 552600 }, { "epoch": 14.16, "learning_rate": 2.804249069235236e-06, "loss": 2.3683, "step": 552700 }, { "epoch": 14.16, "learning_rate": 2.7957099429586368e-06, "loss": 2.3848, "step": 552800 }, { "epoch": 14.16, "learning_rate": 2.7871708166820375e-06, "loss": 2.3793, "step": 552900 }, { "epoch": 14.17, "learning_rate": 2.778631690405438e-06, "loss": 2.4302, "step": 553000 }, { "epoch": 14.17, "learning_rate": 2.7700925641288385e-06, "loss": 2.5048, "step": 553100 }, { "epoch": 14.17, "learning_rate": 2.761553437852239e-06, "loss": 2.3093, "step": 553200 }, { "epoch": 14.17, "learning_rate": 2.75301431157564e-06, "loss": 2.1302, "step": 553300 }, { "epoch": 14.18, "learning_rate": 2.74447518529904e-06, "loss": 2.5015, "step": 553400 }, { "epoch": 14.18, "learning_rate": 2.735936059022441e-06, "loss": 2.4615, "step": 553500 }, { "epoch": 14.18, "learning_rate": 2.7273969327458415e-06, "loss": 2.5099, "step": 553600 }, { "epoch": 14.18, "learning_rate": 2.7188578064692422e-06, "loss": 2.2814, "step": 553700 }, { "epoch": 14.19, "learning_rate": 2.7103186801926425e-06, "loss": 2.1708, "step": 553800 }, { "epoch": 14.19, "learning_rate": 2.7017795539160432e-06, "loss": 2.3461, "step": 553900 }, { "epoch": 14.19, "learning_rate": 2.693240427639444e-06, "loss": 2.1178, "step": 554000 }, { "epoch": 14.19, "learning_rate": 2.6847013013628446e-06, "loss": 2.2476, "step": 554100 }, { "epoch": 14.2, "learning_rate": 2.6761621750862453e-06, "loss": 2.1886, "step": 554200 }, { "epoch": 14.2, "learning_rate": 2.667623048809646e-06, "loss": 2.235, "step": 554300 }, { "epoch": 14.2, "learning_rate": 2.6590839225330467e-06, "loss": 2.3423, "step": 554400 }, { "epoch": 14.2, "learning_rate": 2.650544796256447e-06, "loss": 2.3436, "step": 554500 }, { "epoch": 14.21, "learning_rate": 2.6420056699798477e-06, "loss": 2.2064, "step": 554600 }, { "epoch": 14.21, "learning_rate": 2.6334665437032484e-06, "loss": 2.4161, "step": 554700 }, { "epoch": 14.21, "learning_rate": 2.624927417426649e-06, "loss": 2.241, "step": 554800 }, { "epoch": 14.22, "learning_rate": 2.61638829115005e-06, "loss": 2.3325, "step": 554900 }, { "epoch": 14.22, "learning_rate": 2.6078491648734505e-06, "loss": 2.223, "step": 555000 }, { "epoch": 14.22, "learning_rate": 2.599310038596851e-06, "loss": 2.2619, "step": 555100 }, { "epoch": 14.22, "learning_rate": 2.5907709123202515e-06, "loss": 2.3514, "step": 555200 }, { "epoch": 14.23, "learning_rate": 2.582231786043652e-06, "loss": 2.2996, "step": 555300 }, { "epoch": 14.23, "learning_rate": 2.573692659767053e-06, "loss": 2.3588, "step": 555400 }, { "epoch": 14.23, "learning_rate": 2.5651535334904536e-06, "loss": 2.3086, "step": 555500 }, { "epoch": 14.23, "learning_rate": 2.556614407213854e-06, "loss": 2.1997, "step": 555600 }, { "epoch": 14.24, "learning_rate": 2.5480752809372546e-06, "loss": 2.1863, "step": 555700 }, { "epoch": 14.24, "learning_rate": 2.5395361546606553e-06, "loss": 2.171, "step": 555800 }, { "epoch": 14.24, "learning_rate": 2.5309970283840555e-06, "loss": 2.2636, "step": 555900 }, { "epoch": 14.24, "learning_rate": 2.5224579021074563e-06, "loss": 2.3338, "step": 556000 }, { "epoch": 14.25, "learning_rate": 2.513918775830857e-06, "loss": 2.3938, "step": 556100 }, { "epoch": 14.25, "learning_rate": 2.5053796495542577e-06, "loss": 2.4101, "step": 556200 }, { "epoch": 14.25, "learning_rate": 2.4968405232776584e-06, "loss": 2.2689, "step": 556300 }, { "epoch": 14.25, "learning_rate": 2.488301397001059e-06, "loss": 2.2431, "step": 556400 }, { "epoch": 14.26, "learning_rate": 2.4797622707244598e-06, "loss": 2.3691, "step": 556500 }, { "epoch": 14.26, "learning_rate": 2.47122314444786e-06, "loss": 2.3202, "step": 556600 }, { "epoch": 14.26, "learning_rate": 2.4626840181712607e-06, "loss": 2.094, "step": 556700 }, { "epoch": 14.26, "learning_rate": 2.4541448918946614e-06, "loss": 2.4063, "step": 556800 }, { "epoch": 14.27, "learning_rate": 2.445605765618062e-06, "loss": 2.2024, "step": 556900 }, { "epoch": 14.27, "learning_rate": 2.437066639341463e-06, "loss": 2.1287, "step": 557000 }, { "epoch": 14.27, "learning_rate": 2.4285275130648635e-06, "loss": 2.1329, "step": 557100 }, { "epoch": 14.27, "learning_rate": 2.4199883867882642e-06, "loss": 2.3669, "step": 557200 }, { "epoch": 14.28, "learning_rate": 2.4114492605116645e-06, "loss": 2.3657, "step": 557300 }, { "epoch": 14.28, "learning_rate": 2.4029101342350652e-06, "loss": 2.4, "step": 557400 }, { "epoch": 14.28, "learning_rate": 2.394371007958466e-06, "loss": 2.3162, "step": 557500 }, { "epoch": 14.28, "learning_rate": 2.3858318816818666e-06, "loss": 2.3129, "step": 557600 }, { "epoch": 14.29, "learning_rate": 2.3772927554052673e-06, "loss": 2.1451, "step": 557700 }, { "epoch": 14.29, "learning_rate": 2.3687536291286676e-06, "loss": 2.4304, "step": 557800 }, { "epoch": 14.29, "learning_rate": 2.3602145028520683e-06, "loss": 2.2704, "step": 557900 }, { "epoch": 14.29, "learning_rate": 2.3516753765754686e-06, "loss": 2.3049, "step": 558000 }, { "epoch": 14.3, "learning_rate": 2.3431362502988693e-06, "loss": 2.2524, "step": 558100 }, { "epoch": 14.3, "learning_rate": 2.33459712402227e-06, "loss": 2.3015, "step": 558200 }, { "epoch": 14.3, "learning_rate": 2.3260579977456707e-06, "loss": 2.278, "step": 558300 }, { "epoch": 14.3, "learning_rate": 2.3175188714690714e-06, "loss": 2.3302, "step": 558400 }, { "epoch": 14.31, "learning_rate": 2.308979745192472e-06, "loss": 2.2837, "step": 558500 }, { "epoch": 14.31, "learning_rate": 2.3004406189158728e-06, "loss": 2.3509, "step": 558600 }, { "epoch": 14.31, "learning_rate": 2.291901492639273e-06, "loss": 2.3553, "step": 558700 }, { "epoch": 14.31, "learning_rate": 2.2833623663626738e-06, "loss": 2.1597, "step": 558800 }, { "epoch": 14.32, "learning_rate": 2.2748232400860745e-06, "loss": 2.3153, "step": 558900 }, { "epoch": 14.32, "learning_rate": 2.266284113809475e-06, "loss": 2.2201, "step": 559000 }, { "epoch": 14.32, "learning_rate": 2.257744987532876e-06, "loss": 2.3287, "step": 559100 }, { "epoch": 14.33, "learning_rate": 2.2492058612562766e-06, "loss": 2.2204, "step": 559200 }, { "epoch": 14.33, "learning_rate": 2.2406667349796773e-06, "loss": 2.2555, "step": 559300 }, { "epoch": 14.33, "learning_rate": 2.2321276087030775e-06, "loss": 2.3604, "step": 559400 }, { "epoch": 14.33, "learning_rate": 2.2235884824264783e-06, "loss": 2.3152, "step": 559500 }, { "epoch": 14.34, "learning_rate": 2.215049356149879e-06, "loss": 2.2703, "step": 559600 }, { "epoch": 14.34, "learning_rate": 2.2065102298732797e-06, "loss": 2.4655, "step": 559700 }, { "epoch": 14.34, "learning_rate": 2.1979711035966804e-06, "loss": 2.1722, "step": 559800 }, { "epoch": 14.34, "learning_rate": 2.189431977320081e-06, "loss": 2.2994, "step": 559900 }, { "epoch": 14.35, "learning_rate": 2.1808928510434813e-06, "loss": 2.2292, "step": 560000 }, { "epoch": 14.35, "learning_rate": 2.1723537247668816e-06, "loss": 2.5873, "step": 560100 }, { "epoch": 14.35, "learning_rate": 2.1638145984902823e-06, "loss": 2.2973, "step": 560200 }, { "epoch": 14.35, "learning_rate": 2.155275472213683e-06, "loss": 2.4311, "step": 560300 }, { "epoch": 14.36, "learning_rate": 2.1467363459370837e-06, "loss": 2.1605, "step": 560400 }, { "epoch": 14.36, "learning_rate": 2.1381972196604844e-06, "loss": 2.2169, "step": 560500 }, { "epoch": 14.36, "learning_rate": 2.129658093383885e-06, "loss": 2.3904, "step": 560600 }, { "epoch": 14.36, "learning_rate": 2.121118967107286e-06, "loss": 2.2097, "step": 560700 }, { "epoch": 14.37, "learning_rate": 2.112579840830686e-06, "loss": 2.2065, "step": 560800 }, { "epoch": 14.37, "learning_rate": 2.104040714554087e-06, "loss": 2.2926, "step": 560900 }, { "epoch": 14.37, "learning_rate": 2.0955015882774875e-06, "loss": 2.3529, "step": 561000 }, { "epoch": 14.37, "learning_rate": 2.086962462000888e-06, "loss": 2.2113, "step": 561100 }, { "epoch": 14.38, "learning_rate": 2.078423335724289e-06, "loss": 2.1736, "step": 561200 }, { "epoch": 14.38, "learning_rate": 2.0698842094476896e-06, "loss": 2.1903, "step": 561300 }, { "epoch": 14.38, "learning_rate": 2.0613450831710903e-06, "loss": 2.3195, "step": 561400 }, { "epoch": 14.38, "learning_rate": 2.0528059568944906e-06, "loss": 2.3093, "step": 561500 }, { "epoch": 14.39, "learning_rate": 2.0442668306178913e-06, "loss": 2.3438, "step": 561600 }, { "epoch": 14.39, "learning_rate": 2.035727704341292e-06, "loss": 2.2253, "step": 561700 }, { "epoch": 14.39, "learning_rate": 2.0271885780646927e-06, "loss": 2.2596, "step": 561800 }, { "epoch": 14.39, "learning_rate": 2.0186494517880934e-06, "loss": 2.3994, "step": 561900 }, { "epoch": 14.4, "learning_rate": 2.010110325511494e-06, "loss": 2.2312, "step": 562000 }, { "epoch": 14.4, "learning_rate": 2.0015711992348944e-06, "loss": 2.4136, "step": 562100 }, { "epoch": 14.4, "learning_rate": 1.9930320729582946e-06, "loss": 2.305, "step": 562200 }, { "epoch": 14.4, "learning_rate": 1.9844929466816953e-06, "loss": 2.311, "step": 562300 }, { "epoch": 14.41, "learning_rate": 1.975953820405096e-06, "loss": 2.3172, "step": 562400 }, { "epoch": 14.41, "learning_rate": 1.9674146941284967e-06, "loss": 2.253, "step": 562500 }, { "epoch": 14.41, "learning_rate": 1.9588755678518974e-06, "loss": 2.1828, "step": 562600 }, { "epoch": 14.41, "learning_rate": 1.950336441575298e-06, "loss": 2.3561, "step": 562700 }, { "epoch": 14.42, "learning_rate": 1.941797315298699e-06, "loss": 2.3354, "step": 562800 }, { "epoch": 14.42, "learning_rate": 1.933258189022099e-06, "loss": 2.3418, "step": 562900 }, { "epoch": 14.42, "learning_rate": 1.9247190627455e-06, "loss": 2.1337, "step": 563000 }, { "epoch": 14.43, "learning_rate": 1.9161799364689005e-06, "loss": 2.2881, "step": 563100 }, { "epoch": 14.43, "learning_rate": 1.9076408101923012e-06, "loss": 2.1455, "step": 563200 }, { "epoch": 14.43, "learning_rate": 1.899101683915702e-06, "loss": 2.2943, "step": 563300 }, { "epoch": 14.43, "learning_rate": 1.8905625576391024e-06, "loss": 2.4468, "step": 563400 }, { "epoch": 14.44, "learning_rate": 1.8820234313625031e-06, "loss": 2.2302, "step": 563500 }, { "epoch": 14.44, "learning_rate": 1.8734843050859038e-06, "loss": 2.2683, "step": 563600 }, { "epoch": 14.44, "learning_rate": 1.8649451788093043e-06, "loss": 2.3384, "step": 563700 }, { "epoch": 14.44, "learning_rate": 1.856406052532705e-06, "loss": 2.4106, "step": 563800 }, { "epoch": 14.45, "learning_rate": 1.8478669262561057e-06, "loss": 2.3334, "step": 563900 }, { "epoch": 14.45, "learning_rate": 1.8393277999795064e-06, "loss": 2.3531, "step": 564000 }, { "epoch": 14.45, "learning_rate": 1.830788673702907e-06, "loss": 2.3331, "step": 564100 }, { "epoch": 14.45, "learning_rate": 1.8222495474263076e-06, "loss": 2.176, "step": 564200 }, { "epoch": 14.46, "learning_rate": 1.8137104211497079e-06, "loss": 2.3081, "step": 564300 }, { "epoch": 14.46, "learning_rate": 1.8051712948731086e-06, "loss": 2.3887, "step": 564400 }, { "epoch": 14.46, "learning_rate": 1.796632168596509e-06, "loss": 2.3987, "step": 564500 }, { "epoch": 14.46, "learning_rate": 1.7880930423199098e-06, "loss": 2.3948, "step": 564600 }, { "epoch": 14.47, "learning_rate": 1.7795539160433105e-06, "loss": 2.2204, "step": 564700 }, { "epoch": 14.47, "learning_rate": 1.771014789766711e-06, "loss": 2.2972, "step": 564800 }, { "epoch": 14.47, "learning_rate": 1.7624756634901117e-06, "loss": 2.3433, "step": 564900 }, { "epoch": 14.47, "learning_rate": 1.7539365372135124e-06, "loss": 2.2312, "step": 565000 }, { "epoch": 14.48, "learning_rate": 1.745397410936913e-06, "loss": 2.2213, "step": 565100 }, { "epoch": 14.48, "learning_rate": 1.7368582846603136e-06, "loss": 2.3164, "step": 565200 }, { "epoch": 14.48, "learning_rate": 1.7283191583837143e-06, "loss": 2.2808, "step": 565300 }, { "epoch": 14.48, "learning_rate": 1.719780032107115e-06, "loss": 2.0961, "step": 565400 }, { "epoch": 14.49, "learning_rate": 1.7112409058305155e-06, "loss": 2.3284, "step": 565500 }, { "epoch": 14.49, "learning_rate": 1.7027017795539162e-06, "loss": 2.2878, "step": 565600 }, { "epoch": 14.49, "learning_rate": 1.6941626532773169e-06, "loss": 2.22, "step": 565700 }, { "epoch": 14.49, "learning_rate": 1.6856235270007173e-06, "loss": 2.2838, "step": 565800 }, { "epoch": 14.5, "learning_rate": 1.677084400724118e-06, "loss": 2.1683, "step": 565900 }, { "epoch": 14.5, "learning_rate": 1.6685452744475187e-06, "loss": 2.1824, "step": 566000 }, { "epoch": 14.5, "learning_rate": 1.6600061481709194e-06, "loss": 2.2465, "step": 566100 }, { "epoch": 14.5, "learning_rate": 1.65146702189432e-06, "loss": 2.2038, "step": 566200 }, { "epoch": 14.51, "learning_rate": 1.6429278956177206e-06, "loss": 2.2572, "step": 566300 }, { "epoch": 14.51, "learning_rate": 1.6343887693411213e-06, "loss": 2.2173, "step": 566400 }, { "epoch": 14.51, "learning_rate": 1.6258496430645216e-06, "loss": 2.3154, "step": 566500 }, { "epoch": 14.51, "learning_rate": 1.617310516787922e-06, "loss": 2.3011, "step": 566600 }, { "epoch": 14.52, "learning_rate": 1.6087713905113228e-06, "loss": 2.2621, "step": 566700 }, { "epoch": 14.52, "learning_rate": 1.6002322642347235e-06, "loss": 2.3119, "step": 566800 }, { "epoch": 14.52, "learning_rate": 1.591693137958124e-06, "loss": 2.331, "step": 566900 }, { "epoch": 14.53, "learning_rate": 1.5831540116815247e-06, "loss": 2.4661, "step": 567000 }, { "epoch": 14.53, "learning_rate": 1.5746148854049254e-06, "loss": 2.3069, "step": 567100 }, { "epoch": 14.53, "learning_rate": 1.566075759128326e-06, "loss": 2.2827, "step": 567200 }, { "epoch": 14.53, "learning_rate": 1.5575366328517266e-06, "loss": 2.3809, "step": 567300 }, { "epoch": 14.54, "learning_rate": 1.5489975065751273e-06, "loss": 2.2665, "step": 567400 }, { "epoch": 14.54, "learning_rate": 1.540458380298528e-06, "loss": 2.2672, "step": 567500 }, { "epoch": 14.54, "learning_rate": 1.5319192540219285e-06, "loss": 2.3254, "step": 567600 }, { "epoch": 14.54, "learning_rate": 1.5233801277453292e-06, "loss": 2.1762, "step": 567700 }, { "epoch": 14.55, "learning_rate": 1.5148410014687299e-06, "loss": 2.1415, "step": 567800 }, { "epoch": 14.55, "learning_rate": 1.5063018751921304e-06, "loss": 2.2914, "step": 567900 }, { "epoch": 14.55, "learning_rate": 1.497762748915531e-06, "loss": 2.2524, "step": 568000 }, { "epoch": 14.55, "learning_rate": 1.4892236226389318e-06, "loss": 2.2628, "step": 568100 }, { "epoch": 14.56, "learning_rate": 1.4806844963623323e-06, "loss": 2.3373, "step": 568200 }, { "epoch": 14.56, "learning_rate": 1.4721453700857328e-06, "loss": 2.3604, "step": 568300 }, { "epoch": 14.56, "learning_rate": 1.4636062438091335e-06, "loss": 2.2078, "step": 568400 }, { "epoch": 14.56, "learning_rate": 1.4550671175325342e-06, "loss": 2.3737, "step": 568500 }, { "epoch": 14.57, "learning_rate": 1.4465279912559346e-06, "loss": 2.1506, "step": 568600 }, { "epoch": 14.57, "learning_rate": 1.4379888649793353e-06, "loss": 2.4117, "step": 568700 }, { "epoch": 14.57, "learning_rate": 1.429449738702736e-06, "loss": 2.2271, "step": 568800 }, { "epoch": 14.57, "learning_rate": 1.4209106124261367e-06, "loss": 2.3585, "step": 568900 }, { "epoch": 14.58, "learning_rate": 1.4123714861495372e-06, "loss": 2.4083, "step": 569000 }, { "epoch": 14.58, "learning_rate": 1.403832359872938e-06, "loss": 2.2435, "step": 569100 }, { "epoch": 14.58, "learning_rate": 1.3952932335963386e-06, "loss": 2.1267, "step": 569200 }, { "epoch": 14.58, "learning_rate": 1.3867541073197391e-06, "loss": 2.2659, "step": 569300 }, { "epoch": 14.59, "learning_rate": 1.3782149810431396e-06, "loss": 2.234, "step": 569400 }, { "epoch": 14.59, "learning_rate": 1.3696758547665403e-06, "loss": 2.3186, "step": 569500 }, { "epoch": 14.59, "learning_rate": 1.361136728489941e-06, "loss": 2.3054, "step": 569600 }, { "epoch": 14.59, "learning_rate": 1.3525976022133415e-06, "loss": 2.243, "step": 569700 }, { "epoch": 14.6, "learning_rate": 1.3440584759367422e-06, "loss": 2.1987, "step": 569800 }, { "epoch": 14.6, "learning_rate": 1.335519349660143e-06, "loss": 2.4739, "step": 569900 }, { "epoch": 14.6, "learning_rate": 1.3269802233835434e-06, "loss": 2.156, "step": 570000 }, { "epoch": 14.6, "learning_rate": 1.318441097106944e-06, "loss": 2.2558, "step": 570100 }, { "epoch": 14.61, "learning_rate": 1.3099019708303448e-06, "loss": 2.3416, "step": 570200 }, { "epoch": 14.61, "learning_rate": 1.3013628445537455e-06, "loss": 2.3596, "step": 570300 }, { "epoch": 14.61, "learning_rate": 1.2928237182771458e-06, "loss": 2.3077, "step": 570400 }, { "epoch": 14.61, "learning_rate": 1.2842845920005465e-06, "loss": 2.377, "step": 570500 }, { "epoch": 14.62, "learning_rate": 1.2757454657239472e-06, "loss": 2.2575, "step": 570600 }, { "epoch": 14.62, "learning_rate": 1.2672063394473479e-06, "loss": 2.3954, "step": 570700 }, { "epoch": 14.62, "learning_rate": 1.2586672131707484e-06, "loss": 2.1197, "step": 570800 }, { "epoch": 14.62, "learning_rate": 1.250128086894149e-06, "loss": 2.3559, "step": 570900 }, { "epoch": 14.63, "learning_rate": 1.2415889606175498e-06, "loss": 2.1791, "step": 571000 }, { "epoch": 14.63, "learning_rate": 1.2330498343409503e-06, "loss": 2.2896, "step": 571100 }, { "epoch": 14.63, "learning_rate": 1.224510708064351e-06, "loss": 2.3246, "step": 571200 }, { "epoch": 14.64, "learning_rate": 1.2159715817877517e-06, "loss": 2.3038, "step": 571300 }, { "epoch": 14.64, "learning_rate": 1.2074324555111522e-06, "loss": 2.4088, "step": 571400 }, { "epoch": 14.64, "learning_rate": 1.1988933292345527e-06, "loss": 2.2095, "step": 571500 }, { "epoch": 14.64, "learning_rate": 1.1903542029579534e-06, "loss": 2.407, "step": 571600 }, { "epoch": 14.65, "learning_rate": 1.181815076681354e-06, "loss": 2.1209, "step": 571700 }, { "epoch": 14.65, "learning_rate": 1.1732759504047545e-06, "loss": 2.1136, "step": 571800 }, { "epoch": 14.65, "learning_rate": 1.1647368241281552e-06, "loss": 2.3447, "step": 571900 }, { "epoch": 14.65, "learning_rate": 1.156197697851556e-06, "loss": 2.2854, "step": 572000 }, { "epoch": 14.66, "learning_rate": 1.1476585715749564e-06, "loss": 2.2554, "step": 572100 }, { "epoch": 14.66, "learning_rate": 1.1391194452983571e-06, "loss": 2.3975, "step": 572200 }, { "epoch": 14.66, "learning_rate": 1.1305803190217578e-06, "loss": 2.1929, "step": 572300 }, { "epoch": 14.66, "learning_rate": 1.1220411927451585e-06, "loss": 2.3007, "step": 572400 }, { "epoch": 14.67, "learning_rate": 1.113502066468559e-06, "loss": 2.433, "step": 572500 }, { "epoch": 14.67, "learning_rate": 1.1049629401919595e-06, "loss": 2.1499, "step": 572600 }, { "epoch": 14.67, "learning_rate": 1.0964238139153602e-06, "loss": 2.1606, "step": 572700 }, { "epoch": 14.67, "learning_rate": 1.087884687638761e-06, "loss": 2.2014, "step": 572800 }, { "epoch": 14.68, "learning_rate": 1.0793455613621614e-06, "loss": 2.229, "step": 572900 }, { "epoch": 14.68, "learning_rate": 1.0708064350855621e-06, "loss": 2.1355, "step": 573000 }, { "epoch": 14.68, "learning_rate": 1.0622673088089628e-06, "loss": 2.1702, "step": 573100 }, { "epoch": 14.68, "learning_rate": 1.0537281825323633e-06, "loss": 2.3781, "step": 573200 }, { "epoch": 14.69, "learning_rate": 1.045189056255764e-06, "loss": 2.2928, "step": 573300 }, { "epoch": 14.69, "learning_rate": 1.0366499299791647e-06, "loss": 2.2605, "step": 573400 }, { "epoch": 14.69, "learning_rate": 1.0281108037025652e-06, "loss": 2.3233, "step": 573500 }, { "epoch": 14.69, "learning_rate": 1.0195716774259657e-06, "loss": 2.2551, "step": 573600 }, { "epoch": 14.7, "learning_rate": 1.0110325511493664e-06, "loss": 2.4301, "step": 573700 }, { "epoch": 14.7, "learning_rate": 1.002493424872767e-06, "loss": 2.2655, "step": 573800 }, { "epoch": 14.7, "learning_rate": 9.939542985961676e-07, "loss": 2.3177, "step": 573900 }, { "epoch": 14.7, "learning_rate": 9.854151723195683e-07, "loss": 2.2581, "step": 574000 }, { "epoch": 14.71, "learning_rate": 9.76876046042969e-07, "loss": 2.2795, "step": 574100 }, { "epoch": 14.71, "learning_rate": 9.683369197663695e-07, "loss": 2.4648, "step": 574200 }, { "epoch": 14.71, "learning_rate": 9.597977934897702e-07, "loss": 2.2556, "step": 574300 }, { "epoch": 14.71, "learning_rate": 9.512586672131709e-07, "loss": 2.2184, "step": 574400 }, { "epoch": 14.72, "learning_rate": 9.427195409365715e-07, "loss": 2.2618, "step": 574500 }, { "epoch": 14.72, "learning_rate": 9.341804146599721e-07, "loss": 2.3441, "step": 574600 }, { "epoch": 14.72, "learning_rate": 9.256412883833725e-07, "loss": 2.1492, "step": 574700 }, { "epoch": 14.72, "learning_rate": 9.171021621067732e-07, "loss": 2.3674, "step": 574800 }, { "epoch": 14.73, "learning_rate": 9.085630358301738e-07, "loss": 2.1999, "step": 574900 }, { "epoch": 14.73, "learning_rate": 9.000239095535744e-07, "loss": 2.2784, "step": 575000 }, { "epoch": 14.73, "learning_rate": 8.914847832769751e-07, "loss": 2.2225, "step": 575100 }, { "epoch": 14.74, "learning_rate": 8.829456570003757e-07, "loss": 2.2886, "step": 575200 }, { "epoch": 14.74, "learning_rate": 8.744065307237764e-07, "loss": 2.3574, "step": 575300 }, { "epoch": 14.74, "learning_rate": 8.65867404447177e-07, "loss": 2.2308, "step": 575400 }, { "epoch": 14.74, "learning_rate": 8.573282781705776e-07, "loss": 2.2534, "step": 575500 }, { "epoch": 14.75, "learning_rate": 8.487891518939783e-07, "loss": 2.2353, "step": 575600 }, { "epoch": 14.75, "learning_rate": 8.402500256173789e-07, "loss": 2.2454, "step": 575700 }, { "epoch": 14.75, "learning_rate": 8.317108993407794e-07, "loss": 2.4992, "step": 575800 }, { "epoch": 14.75, "learning_rate": 8.2317177306418e-07, "loss": 2.4162, "step": 575900 }, { "epoch": 14.76, "learning_rate": 8.146326467875807e-07, "loss": 2.2603, "step": 576000 }, { "epoch": 14.76, "learning_rate": 8.060935205109813e-07, "loss": 2.4212, "step": 576100 }, { "epoch": 14.76, "learning_rate": 7.97554394234382e-07, "loss": 2.1244, "step": 576200 }, { "epoch": 14.76, "learning_rate": 7.890152679577826e-07, "loss": 2.3865, "step": 576300 }, { "epoch": 14.77, "learning_rate": 7.804761416811832e-07, "loss": 2.2334, "step": 576400 }, { "epoch": 14.77, "learning_rate": 7.719370154045839e-07, "loss": 2.3344, "step": 576500 }, { "epoch": 14.77, "learning_rate": 7.633978891279845e-07, "loss": 2.2259, "step": 576600 }, { "epoch": 14.77, "learning_rate": 7.548587628513851e-07, "loss": 2.4459, "step": 576700 }, { "epoch": 14.78, "learning_rate": 7.463196365747857e-07, "loss": 2.3647, "step": 576800 }, { "epoch": 14.78, "learning_rate": 7.377805102981863e-07, "loss": 2.4897, "step": 576900 }, { "epoch": 14.78, "learning_rate": 7.29241384021587e-07, "loss": 2.3553, "step": 577000 }, { "epoch": 14.78, "learning_rate": 7.207022577449876e-07, "loss": 2.2556, "step": 577100 }, { "epoch": 14.79, "learning_rate": 7.121631314683882e-07, "loss": 2.3734, "step": 577200 }, { "epoch": 14.79, "learning_rate": 7.036240051917888e-07, "loss": 2.414, "step": 577300 }, { "epoch": 14.79, "learning_rate": 6.950848789151895e-07, "loss": 2.2522, "step": 577400 }, { "epoch": 14.79, "learning_rate": 6.865457526385901e-07, "loss": 2.1318, "step": 577500 }, { "epoch": 14.8, "learning_rate": 6.780066263619907e-07, "loss": 2.1168, "step": 577600 }, { "epoch": 14.8, "learning_rate": 6.694675000853914e-07, "loss": 2.3635, "step": 577700 }, { "epoch": 14.8, "learning_rate": 6.609283738087918e-07, "loss": 2.3013, "step": 577800 }, { "epoch": 14.8, "learning_rate": 6.523892475321925e-07, "loss": 2.3134, "step": 577900 }, { "epoch": 14.81, "learning_rate": 6.438501212555931e-07, "loss": 2.3957, "step": 578000 }, { "epoch": 14.81, "learning_rate": 6.353109949789938e-07, "loss": 2.1935, "step": 578100 }, { "epoch": 14.81, "learning_rate": 6.267718687023944e-07, "loss": 2.2694, "step": 578200 }, { "epoch": 14.81, "learning_rate": 6.18232742425795e-07, "loss": 2.3684, "step": 578300 }, { "epoch": 14.82, "learning_rate": 6.096936161491956e-07, "loss": 2.2628, "step": 578400 }, { "epoch": 14.82, "learning_rate": 6.011544898725962e-07, "loss": 2.2747, "step": 578500 }, { "epoch": 14.82, "learning_rate": 5.926153635959969e-07, "loss": 2.2606, "step": 578600 }, { "epoch": 14.82, "learning_rate": 5.840762373193975e-07, "loss": 2.3139, "step": 578700 }, { "epoch": 14.83, "learning_rate": 5.755371110427981e-07, "loss": 2.329, "step": 578800 }, { "epoch": 14.83, "learning_rate": 5.669979847661987e-07, "loss": 2.2978, "step": 578900 }, { "epoch": 14.83, "learning_rate": 5.584588584895993e-07, "loss": 2.1899, "step": 579000 }, { "epoch": 14.84, "learning_rate": 5.49919732213e-07, "loss": 2.2918, "step": 579100 }, { "epoch": 14.84, "learning_rate": 5.413806059364006e-07, "loss": 2.1719, "step": 579200 }, { "epoch": 14.84, "learning_rate": 5.328414796598013e-07, "loss": 2.4, "step": 579300 }, { "epoch": 14.84, "learning_rate": 5.243023533832018e-07, "loss": 2.2283, "step": 579400 }, { "epoch": 14.85, "learning_rate": 5.157632271066025e-07, "loss": 2.4378, "step": 579500 }, { "epoch": 14.85, "learning_rate": 5.072241008300031e-07, "loss": 2.2668, "step": 579600 }, { "epoch": 14.85, "learning_rate": 4.986849745534037e-07, "loss": 2.1497, "step": 579700 }, { "epoch": 14.85, "learning_rate": 4.901458482768044e-07, "loss": 2.2747, "step": 579800 }, { "epoch": 14.86, "learning_rate": 4.816067220002049e-07, "loss": 2.2221, "step": 579900 }, { "epoch": 14.86, "learning_rate": 4.730675957236056e-07, "loss": 2.3611, "step": 580000 }, { "epoch": 14.86, "learning_rate": 4.645284694470062e-07, "loss": 2.2331, "step": 580100 }, { "epoch": 14.86, "learning_rate": 4.559893431704068e-07, "loss": 2.3034, "step": 580200 }, { "epoch": 14.87, "learning_rate": 4.4745021689380747e-07, "loss": 2.2968, "step": 580300 }, { "epoch": 14.87, "learning_rate": 4.389110906172081e-07, "loss": 2.3666, "step": 580400 }, { "epoch": 14.87, "learning_rate": 4.3037196434060866e-07, "loss": 2.3805, "step": 580500 }, { "epoch": 14.87, "learning_rate": 4.218328380640093e-07, "loss": 2.3279, "step": 580600 }, { "epoch": 14.88, "learning_rate": 4.132937117874099e-07, "loss": 2.1707, "step": 580700 }, { "epoch": 14.88, "learning_rate": 4.0475458551081055e-07, "loss": 2.0922, "step": 580800 }, { "epoch": 14.88, "learning_rate": 3.962154592342112e-07, "loss": 2.3051, "step": 580900 }, { "epoch": 14.88, "learning_rate": 3.876763329576118e-07, "loss": 2.1741, "step": 581000 }, { "epoch": 14.89, "learning_rate": 3.7913720668101245e-07, "loss": 2.3168, "step": 581100 }, { "epoch": 14.89, "learning_rate": 3.7059808040441304e-07, "loss": 2.2626, "step": 581200 }, { "epoch": 14.89, "learning_rate": 3.620589541278137e-07, "loss": 2.435, "step": 581300 }, { "epoch": 14.89, "learning_rate": 3.535198278512143e-07, "loss": 2.3655, "step": 581400 }, { "epoch": 14.9, "learning_rate": 3.449807015746149e-07, "loss": 2.2915, "step": 581500 }, { "epoch": 14.9, "learning_rate": 3.3644157529801553e-07, "loss": 2.2458, "step": 581600 }, { "epoch": 14.9, "learning_rate": 3.279024490214161e-07, "loss": 2.23, "step": 581700 }, { "epoch": 14.9, "learning_rate": 3.1936332274481677e-07, "loss": 2.3178, "step": 581800 }, { "epoch": 14.91, "learning_rate": 3.108241964682174e-07, "loss": 2.3426, "step": 581900 }, { "epoch": 14.91, "learning_rate": 3.02285070191618e-07, "loss": 2.1817, "step": 582000 }, { "epoch": 14.91, "learning_rate": 2.937459439150186e-07, "loss": 2.1318, "step": 582100 }, { "epoch": 14.91, "learning_rate": 2.8520681763841926e-07, "loss": 2.3357, "step": 582200 }, { "epoch": 14.92, "learning_rate": 2.7666769136181985e-07, "loss": 2.4255, "step": 582300 }, { "epoch": 14.92, "learning_rate": 2.681285650852205e-07, "loss": 2.3859, "step": 582400 }, { "epoch": 14.92, "learning_rate": 2.595894388086211e-07, "loss": 2.2031, "step": 582500 }, { "epoch": 14.92, "learning_rate": 2.5105031253202175e-07, "loss": 2.296, "step": 582600 }, { "epoch": 14.93, "learning_rate": 2.4251118625542234e-07, "loss": 2.1092, "step": 582700 }, { "epoch": 14.93, "learning_rate": 2.3397205997882296e-07, "loss": 2.2336, "step": 582800 }, { "epoch": 14.93, "learning_rate": 2.2543293370222359e-07, "loss": 2.3101, "step": 582900 }, { "epoch": 14.93, "learning_rate": 2.1689380742562423e-07, "loss": 2.2666, "step": 583000 }, { "epoch": 14.94, "learning_rate": 2.0835468114902483e-07, "loss": 2.31, "step": 583100 }, { "epoch": 14.94, "learning_rate": 1.9981555487242548e-07, "loss": 2.2143, "step": 583200 }, { "epoch": 14.94, "learning_rate": 1.9127642859582607e-07, "loss": 2.3068, "step": 583300 }, { "epoch": 14.95, "learning_rate": 1.827373023192267e-07, "loss": 2.2878, "step": 583400 }, { "epoch": 14.95, "learning_rate": 1.7419817604262734e-07, "loss": 2.131, "step": 583500 }, { "epoch": 14.95, "learning_rate": 1.6565904976602794e-07, "loss": 2.1412, "step": 583600 }, { "epoch": 14.95, "learning_rate": 1.5711992348942856e-07, "loss": 2.3785, "step": 583700 }, { "epoch": 14.96, "learning_rate": 1.485807972128292e-07, "loss": 2.3242, "step": 583800 }, { "epoch": 14.96, "learning_rate": 1.4004167093622983e-07, "loss": 2.2279, "step": 583900 }, { "epoch": 14.96, "learning_rate": 1.3150254465963042e-07, "loss": 2.3326, "step": 584000 }, { "epoch": 14.96, "learning_rate": 1.2296341838303105e-07, "loss": 2.2253, "step": 584100 }, { "epoch": 14.97, "learning_rate": 1.1442429210643168e-07, "loss": 2.3627, "step": 584200 }, { "epoch": 14.97, "learning_rate": 1.058851658298323e-07, "loss": 2.1505, "step": 584300 }, { "epoch": 14.97, "learning_rate": 9.734603955323291e-08, "loss": 2.4305, "step": 584400 }, { "epoch": 14.97, "learning_rate": 8.880691327663355e-08, "loss": 2.312, "step": 584500 }, { "epoch": 14.98, "learning_rate": 8.026778700003416e-08, "loss": 2.1861, "step": 584600 }, { "epoch": 14.98, "learning_rate": 7.172866072343478e-08, "loss": 2.3235, "step": 584700 }, { "epoch": 14.98, "learning_rate": 6.31895344468354e-08, "loss": 2.2686, "step": 584800 }, { "epoch": 14.98, "learning_rate": 5.465040817023603e-08, "loss": 2.2017, "step": 584900 }, { "epoch": 14.99, "learning_rate": 4.611128189363664e-08, "loss": 2.1246, "step": 585000 }, { "epoch": 14.99, "learning_rate": 3.7572155617037264e-08, "loss": 2.2377, "step": 585100 }, { "epoch": 14.99, "learning_rate": 2.9033029340437886e-08, "loss": 2.396, "step": 585200 }, { "epoch": 14.99, "learning_rate": 2.0493903063838508e-08, "loss": 2.3171, "step": 585300 }, { "epoch": 15.0, "learning_rate": 1.195477678723913e-08, "loss": 2.271, "step": 585400 }, { "epoch": 15.0, "learning_rate": 3.4156505106397517e-09, "loss": 2.2955, "step": 585500 }, { "epoch": 15.0, "step": 585540, "total_flos": 4.952930904057427e+16, "train_runtime": 597.6283, "train_samples_per_second": 979.773 } ], "max_steps": 585540, "num_train_epochs": 15, "total_flos": 4.952930904057427e+16, "trial_name": null, "trial_params": null }