{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.069933530379425, "eval_steps": 500, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.999999901916556e-05, "loss": 1.7897, "step": 5 }, { "epoch": 0.0, "learning_rate": 4.9999996076662294e-05, "loss": 1.8861, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.999999117249044e-05, "loss": 1.4411, "step": 15 }, { "epoch": 0.0, "learning_rate": 4.999998430665038e-05, "loss": 1.4184, "step": 20 }, { "epoch": 0.0, "learning_rate": 4.9999975479142666e-05, "loss": 1.2592, "step": 25 }, { "epoch": 0.0, "learning_rate": 4.999996468996798e-05, "loss": 1.223, "step": 30 }, { "epoch": 0.0, "learning_rate": 4.999995193912717e-05, "loss": 1.2161, "step": 35 }, { "epoch": 0.0, "learning_rate": 4.999993722662123e-05, "loss": 1.2416, "step": 40 }, { "epoch": 0.0, "learning_rate": 4.9999920552451324e-05, "loss": 1.0517, "step": 45 }, { "epoch": 0.0, "learning_rate": 4.9999901916618755e-05, "loss": 1.2388, "step": 50 }, { "epoch": 0.0, "learning_rate": 4.999988131912499e-05, "loss": 1.2804, "step": 55 }, { "epoch": 0.0, "learning_rate": 4.999985875997164e-05, "loss": 1.0287, "step": 60 }, { "epoch": 0.0, "learning_rate": 4.999983423916048e-05, "loss": 1.1188, "step": 65 }, { "epoch": 0.0, "learning_rate": 4.999980775669344e-05, "loss": 1.1104, "step": 70 }, { "epoch": 0.0, "learning_rate": 4.9999779312572584e-05, "loss": 1.157, "step": 75 }, { "epoch": 0.0, "learning_rate": 4.999974890680015e-05, "loss": 1.1552, "step": 80 }, { "epoch": 0.0, "learning_rate": 4.999971653937852e-05, "loss": 1.1373, "step": 85 }, { "epoch": 0.0, "learning_rate": 4.9999682210310237e-05, "loss": 1.2307, "step": 90 }, { "epoch": 0.01, "learning_rate": 4.9999645919598e-05, "loss": 0.9955, "step": 95 }, { "epoch": 0.01, "learning_rate": 4.999960766724465e-05, "loss": 0.9544, "step": 100 }, { "epoch": 0.01, "learning_rate": 4.999956745325318e-05, "loss": 1.0432, "step": 105 }, { "epoch": 0.01, "learning_rate": 4.999952527762677e-05, "loss": 1.1078, "step": 110 }, { "epoch": 0.01, "learning_rate": 4.999948114036871e-05, "loss": 1.0228, "step": 115 }, { "epoch": 0.01, "learning_rate": 4.9999435041482466e-05, "loss": 1.0644, "step": 120 }, { "epoch": 0.01, "learning_rate": 4.999938698097166e-05, "loss": 1.0653, "step": 125 }, { "epoch": 0.01, "learning_rate": 4.999933695884007e-05, "loss": 1.0642, "step": 130 }, { "epoch": 0.01, "learning_rate": 4.99992849750916e-05, "loss": 0.9661, "step": 135 }, { "epoch": 0.01, "learning_rate": 4.999923102973034e-05, "loss": 1.0496, "step": 140 }, { "epoch": 0.01, "learning_rate": 4.999917512276053e-05, "loss": 0.9529, "step": 145 }, { "epoch": 0.01, "learning_rate": 4.999911725418655e-05, "loss": 0.999, "step": 150 }, { "epoch": 0.01, "learning_rate": 4.999905742401294e-05, "loss": 1.023, "step": 155 }, { "epoch": 0.01, "learning_rate": 4.999899563224439e-05, "loss": 1.0026, "step": 160 }, { "epoch": 0.01, "learning_rate": 4.999893187888577e-05, "loss": 1.0805, "step": 165 }, { "epoch": 0.01, "learning_rate": 4.9998866163942055e-05, "loss": 1.1233, "step": 170 }, { "epoch": 0.01, "learning_rate": 4.999879848741842e-05, "loss": 1.1635, "step": 175 }, { "epoch": 0.01, "learning_rate": 4.999872884932016e-05, "loss": 0.9927, "step": 180 }, { "epoch": 0.01, "learning_rate": 4.999865724965276e-05, "loss": 1.096, "step": 185 }, { "epoch": 0.01, "learning_rate": 4.999858368842182e-05, "loss": 1.0914, "step": 190 }, { "epoch": 0.01, "learning_rate": 4.999850816563312e-05, "loss": 0.9758, "step": 195 }, { "epoch": 0.01, "learning_rate": 4.999843068129258e-05, "loss": 0.9528, "step": 200 }, { "epoch": 0.01, "learning_rate": 4.9998351235406284e-05, "loss": 1.033, "step": 205 }, { "epoch": 0.01, "learning_rate": 4.999826982798047e-05, "loss": 1.2451, "step": 210 }, { "epoch": 0.01, "learning_rate": 4.999818645902152e-05, "loss": 0.9208, "step": 215 }, { "epoch": 0.01, "learning_rate": 4.9998101128535984e-05, "loss": 0.9928, "step": 220 }, { "epoch": 0.01, "learning_rate": 4.999801383653055e-05, "loss": 0.8492, "step": 225 }, { "epoch": 0.01, "learning_rate": 4.999792458301207e-05, "loss": 0.8356, "step": 230 }, { "epoch": 0.01, "learning_rate": 4.999783336798754e-05, "loss": 0.8712, "step": 235 }, { "epoch": 0.01, "learning_rate": 4.999774019146413e-05, "loss": 1.1794, "step": 240 }, { "epoch": 0.01, "learning_rate": 4.999764505344914e-05, "loss": 0.8604, "step": 245 }, { "epoch": 0.01, "learning_rate": 4.999754795395004e-05, "loss": 1.0337, "step": 250 }, { "epoch": 0.01, "learning_rate": 4.999744889297445e-05, "loss": 0.9918, "step": 255 }, { "epoch": 0.01, "learning_rate": 4.999734787053014e-05, "loss": 0.9943, "step": 260 }, { "epoch": 0.01, "learning_rate": 4.999724488662505e-05, "loss": 1.0025, "step": 265 }, { "epoch": 0.01, "learning_rate": 4.999713994126724e-05, "loss": 1.0377, "step": 270 }, { "epoch": 0.01, "learning_rate": 4.999703303446496e-05, "loss": 0.7867, "step": 275 }, { "epoch": 0.01, "learning_rate": 4.999692416622659e-05, "loss": 1.0016, "step": 280 }, { "epoch": 0.02, "learning_rate": 4.999681333656068e-05, "loss": 0.8957, "step": 285 }, { "epoch": 0.02, "learning_rate": 4.999670054547592e-05, "loss": 0.9476, "step": 290 }, { "epoch": 0.02, "learning_rate": 4.999658579298116e-05, "loss": 0.8151, "step": 295 }, { "epoch": 0.02, "learning_rate": 4.99964690790854e-05, "loss": 1.0061, "step": 300 }, { "epoch": 0.02, "learning_rate": 4.999635040379782e-05, "loss": 0.9179, "step": 305 }, { "epoch": 0.02, "learning_rate": 4.999622976712771e-05, "loss": 1.0512, "step": 310 }, { "epoch": 0.02, "learning_rate": 4.999610716908455e-05, "loss": 0.9141, "step": 315 }, { "epoch": 0.02, "learning_rate": 4.999598260967795e-05, "loss": 0.8681, "step": 320 }, { "epoch": 0.02, "learning_rate": 4.999585608891768e-05, "loss": 0.8928, "step": 325 }, { "epoch": 0.02, "learning_rate": 4.999572760681368e-05, "loss": 0.9659, "step": 330 }, { "epoch": 0.02, "learning_rate": 4.9995597163376025e-05, "loss": 1.1282, "step": 335 }, { "epoch": 0.02, "learning_rate": 4.999546475861495e-05, "loss": 1.0277, "step": 340 }, { "epoch": 0.02, "learning_rate": 4.9995330392540846e-05, "loss": 0.8917, "step": 345 }, { "epoch": 0.02, "learning_rate": 4.999519406516426e-05, "loss": 0.9693, "step": 350 }, { "epoch": 0.02, "learning_rate": 4.999505577649588e-05, "loss": 0.9954, "step": 355 }, { "epoch": 0.02, "learning_rate": 4.9994915526546565e-05, "loss": 0.7738, "step": 360 }, { "epoch": 0.02, "learning_rate": 4.999477331532732e-05, "loss": 0.9392, "step": 365 }, { "epoch": 0.02, "learning_rate": 4.999462914284929e-05, "loss": 0.9293, "step": 370 }, { "epoch": 0.02, "learning_rate": 4.9994483009123816e-05, "loss": 1.0134, "step": 375 }, { "epoch": 0.02, "learning_rate": 4.999433491416233e-05, "loss": 0.9113, "step": 380 }, { "epoch": 0.02, "learning_rate": 4.9994184857976484e-05, "loss": 0.97, "step": 385 }, { "epoch": 0.02, "learning_rate": 4.999403284057803e-05, "loss": 0.8572, "step": 390 }, { "epoch": 0.02, "learning_rate": 4.9993878861978914e-05, "loss": 0.7934, "step": 395 }, { "epoch": 0.02, "learning_rate": 4.99937229221912e-05, "loss": 1.1504, "step": 400 }, { "epoch": 0.02, "learning_rate": 4.999356502122714e-05, "loss": 0.971, "step": 405 }, { "epoch": 0.02, "learning_rate": 4.9993405159099115e-05, "loss": 0.7483, "step": 410 }, { "epoch": 0.02, "learning_rate": 4.999324333581967e-05, "loss": 0.8933, "step": 415 }, { "epoch": 0.02, "learning_rate": 4.99930795514015e-05, "loss": 0.8414, "step": 420 }, { "epoch": 0.02, "learning_rate": 4.9992913805857465e-05, "loss": 0.991, "step": 425 }, { "epoch": 0.02, "learning_rate": 4.999274609920056e-05, "loss": 1.0806, "step": 430 }, { "epoch": 0.02, "learning_rate": 4.999257643144396e-05, "loss": 1.0021, "step": 435 }, { "epoch": 0.02, "learning_rate": 4.9992404802600963e-05, "loss": 0.9346, "step": 440 }, { "epoch": 0.02, "learning_rate": 4.999223121268504e-05, "loss": 0.8399, "step": 445 }, { "epoch": 0.02, "learning_rate": 4.999205566170981e-05, "loss": 1.0498, "step": 450 }, { "epoch": 0.02, "learning_rate": 4.999187814968906e-05, "loss": 0.8322, "step": 455 }, { "epoch": 0.02, "learning_rate": 4.999169867663671e-05, "loss": 0.768, "step": 460 }, { "epoch": 0.02, "learning_rate": 4.999151724256684e-05, "loss": 0.9191, "step": 465 }, { "epoch": 0.03, "learning_rate": 4.999133384749369e-05, "loss": 0.8601, "step": 470 }, { "epoch": 0.03, "learning_rate": 4.999114849143165e-05, "loss": 0.9834, "step": 475 }, { "epoch": 0.03, "learning_rate": 4.999096117439527e-05, "loss": 1.0768, "step": 480 }, { "epoch": 0.03, "learning_rate": 4.999077189639924e-05, "loss": 0.8655, "step": 485 }, { "epoch": 0.03, "learning_rate": 4.999058065745841e-05, "loss": 1.01, "step": 490 }, { "epoch": 0.03, "learning_rate": 4.999038745758779e-05, "loss": 1.0327, "step": 495 }, { "epoch": 0.03, "learning_rate": 4.999019229680254e-05, "loss": 0.8839, "step": 500 }, { "epoch": 0.03, "learning_rate": 4.998999517511798e-05, "loss": 1.0411, "step": 505 }, { "epoch": 0.03, "learning_rate": 4.998979609254957e-05, "loss": 0.8918, "step": 510 }, { "epoch": 0.03, "learning_rate": 4.998959504911293e-05, "loss": 0.8533, "step": 515 }, { "epoch": 0.03, "learning_rate": 4.9989392044823836e-05, "loss": 0.9949, "step": 520 }, { "epoch": 0.03, "learning_rate": 4.998918707969822e-05, "loss": 0.9551, "step": 525 }, { "epoch": 0.03, "learning_rate": 4.9988980153752164e-05, "loss": 1.0028, "step": 530 }, { "epoch": 0.03, "learning_rate": 4.998877126700191e-05, "loss": 0.9293, "step": 535 }, { "epoch": 0.03, "learning_rate": 4.9988560419463836e-05, "loss": 0.9163, "step": 540 }, { "epoch": 0.03, "learning_rate": 4.9988347611154504e-05, "loss": 0.9049, "step": 545 }, { "epoch": 0.03, "learning_rate": 4.9988132842090596e-05, "loss": 0.9456, "step": 550 }, { "epoch": 0.03, "learning_rate": 4.998791611228897e-05, "loss": 0.9466, "step": 555 }, { "epoch": 0.03, "learning_rate": 4.998769742176663e-05, "loss": 0.9389, "step": 560 }, { "epoch": 0.03, "learning_rate": 4.998747677054074e-05, "loss": 0.8853, "step": 565 }, { "epoch": 0.03, "learning_rate": 4.998725415862861e-05, "loss": 0.797, "step": 570 }, { "epoch": 0.03, "learning_rate": 4.998702958604772e-05, "loss": 1.0129, "step": 575 }, { "epoch": 0.03, "learning_rate": 4.998680305281568e-05, "loss": 0.87, "step": 580 }, { "epoch": 0.03, "learning_rate": 4.998657455895026e-05, "loss": 0.9552, "step": 585 }, { "epoch": 0.03, "learning_rate": 4.99863441044694e-05, "loss": 0.7886, "step": 590 }, { "epoch": 0.03, "learning_rate": 4.9986111689391174e-05, "loss": 0.9748, "step": 595 }, { "epoch": 0.03, "learning_rate": 4.998587731373383e-05, "loss": 0.9811, "step": 600 }, { "epoch": 0.03, "learning_rate": 4.9985640977515744e-05, "loss": 0.8928, "step": 605 }, { "epoch": 0.03, "learning_rate": 4.998540268075548e-05, "loss": 0.9, "step": 610 }, { "epoch": 0.03, "learning_rate": 4.998516242347172e-05, "loss": 0.8575, "step": 615 }, { "epoch": 0.03, "learning_rate": 4.998492020568332e-05, "loss": 0.8665, "step": 620 }, { "epoch": 0.03, "learning_rate": 4.998467602740929e-05, "loss": 0.9402, "step": 625 }, { "epoch": 0.03, "learning_rate": 4.998442988866879e-05, "loss": 0.9656, "step": 630 }, { "epoch": 0.03, "learning_rate": 4.9984181789481134e-05, "loss": 0.9597, "step": 635 }, { "epoch": 0.03, "learning_rate": 4.998393172986577e-05, "loss": 0.8586, "step": 640 }, { "epoch": 0.03, "learning_rate": 4.998367970984236e-05, "loss": 0.8484, "step": 645 }, { "epoch": 0.03, "learning_rate": 4.998342572943064e-05, "loss": 0.955, "step": 650 }, { "epoch": 0.04, "learning_rate": 4.998316978865055e-05, "loss": 0.9496, "step": 655 }, { "epoch": 0.04, "learning_rate": 4.998291188752219e-05, "loss": 0.8029, "step": 660 }, { "epoch": 0.04, "learning_rate": 4.998265202606578e-05, "loss": 1.0053, "step": 665 }, { "epoch": 0.04, "learning_rate": 4.9982390204301715e-05, "loss": 1.0095, "step": 670 }, { "epoch": 0.04, "learning_rate": 4.9982126422250534e-05, "loss": 0.9966, "step": 675 }, { "epoch": 0.04, "learning_rate": 4.9981860679932944e-05, "loss": 0.8871, "step": 680 }, { "epoch": 0.04, "learning_rate": 4.99815929773698e-05, "loss": 0.7858, "step": 685 }, { "epoch": 0.04, "learning_rate": 4.9981323314582085e-05, "loss": 0.9204, "step": 690 }, { "epoch": 0.04, "learning_rate": 4.9981051691590984e-05, "loss": 1.0274, "step": 695 }, { "epoch": 0.04, "learning_rate": 4.99807781084178e-05, "loss": 1.0154, "step": 700 }, { "epoch": 0.04, "learning_rate": 4.9980502565083996e-05, "loss": 0.7178, "step": 705 }, { "epoch": 0.04, "learning_rate": 4.99802250616112e-05, "loss": 0.9133, "step": 710 }, { "epoch": 0.04, "learning_rate": 4.997994559802118e-05, "loss": 1.034, "step": 715 }, { "epoch": 0.04, "learning_rate": 4.9979664174335874e-05, "loss": 1.0744, "step": 720 }, { "epoch": 0.04, "learning_rate": 4.9979380790577364e-05, "loss": 0.9425, "step": 725 }, { "epoch": 0.04, "learning_rate": 4.997909544676788e-05, "loss": 0.9228, "step": 730 }, { "epoch": 0.04, "learning_rate": 4.997880814292981e-05, "loss": 0.7898, "step": 735 }, { "epoch": 0.04, "learning_rate": 4.99785188790857e-05, "loss": 0.9179, "step": 740 }, { "epoch": 0.04, "learning_rate": 4.9978227655258246e-05, "loss": 0.9558, "step": 745 }, { "epoch": 0.04, "learning_rate": 4.997793447147031e-05, "loss": 0.8552, "step": 750 }, { "epoch": 0.04, "learning_rate": 4.997763932774489e-05, "loss": 0.8245, "step": 755 }, { "epoch": 0.04, "learning_rate": 4.997734222410514e-05, "loss": 0.9163, "step": 760 }, { "epoch": 0.04, "learning_rate": 4.997704316057438e-05, "loss": 1.0522, "step": 765 }, { "epoch": 0.04, "learning_rate": 4.997674213717607e-05, "loss": 0.7773, "step": 770 }, { "epoch": 0.04, "learning_rate": 4.9976439153933843e-05, "loss": 0.8591, "step": 775 }, { "epoch": 0.04, "learning_rate": 4.997613421087146e-05, "loss": 0.9712, "step": 780 }, { "epoch": 0.04, "learning_rate": 4.997582730801285e-05, "loss": 0.7712, "step": 785 }, { "epoch": 0.04, "learning_rate": 4.99755184453821e-05, "loss": 0.797, "step": 790 }, { "epoch": 0.04, "learning_rate": 4.997520762300344e-05, "loss": 1.0037, "step": 795 }, { "epoch": 0.04, "learning_rate": 4.997489484090127e-05, "loss": 1.0809, "step": 800 }, { "epoch": 0.04, "learning_rate": 4.9974580099100124e-05, "loss": 0.9685, "step": 805 }, { "epoch": 0.04, "learning_rate": 4.9974263397624695e-05, "loss": 0.8071, "step": 810 }, { "epoch": 0.04, "learning_rate": 4.9973944736499837e-05, "loss": 0.9863, "step": 815 }, { "epoch": 0.04, "learning_rate": 4.997362411575056e-05, "loss": 0.8905, "step": 820 }, { "epoch": 0.04, "learning_rate": 4.9973301535402025e-05, "loss": 0.8756, "step": 825 }, { "epoch": 0.04, "learning_rate": 4.997297699547953e-05, "loss": 0.9958, "step": 830 }, { "epoch": 0.04, "learning_rate": 4.997265049600854e-05, "loss": 1.1043, "step": 835 }, { "epoch": 0.04, "learning_rate": 4.99723220370147e-05, "loss": 0.8283, "step": 840 }, { "epoch": 0.05, "learning_rate": 4.997199161852375e-05, "loss": 0.958, "step": 845 }, { "epoch": 0.05, "learning_rate": 4.997165924056164e-05, "loss": 1.1133, "step": 850 }, { "epoch": 0.05, "learning_rate": 4.997132490315444e-05, "loss": 0.9223, "step": 855 }, { "epoch": 0.05, "learning_rate": 4.9970988606328397e-05, "loss": 0.8501, "step": 860 }, { "epoch": 0.05, "learning_rate": 4.997065035010987e-05, "loss": 0.8628, "step": 865 }, { "epoch": 0.05, "learning_rate": 4.997031013452543e-05, "loss": 0.8997, "step": 870 }, { "epoch": 0.05, "learning_rate": 4.996996795960176e-05, "loss": 0.7625, "step": 875 }, { "epoch": 0.05, "learning_rate": 4.996962382536572e-05, "loss": 0.8228, "step": 880 }, { "epoch": 0.05, "learning_rate": 4.99692777318443e-05, "loss": 0.7735, "step": 885 }, { "epoch": 0.05, "learning_rate": 4.9968929679064655e-05, "loss": 0.8135, "step": 890 }, { "epoch": 0.05, "learning_rate": 4.9968579667054117e-05, "loss": 0.8264, "step": 895 }, { "epoch": 0.05, "learning_rate": 4.996822769584013e-05, "loss": 0.8968, "step": 900 }, { "epoch": 0.05, "learning_rate": 4.996787376545031e-05, "loss": 0.8806, "step": 905 }, { "epoch": 0.05, "learning_rate": 4.9967517875912446e-05, "loss": 0.799, "step": 910 }, { "epoch": 0.05, "learning_rate": 4.9967160027254446e-05, "loss": 1.0222, "step": 915 }, { "epoch": 0.05, "learning_rate": 4.9966800219504405e-05, "loss": 0.906, "step": 920 }, { "epoch": 0.05, "learning_rate": 4.9966438452690545e-05, "loss": 0.8508, "step": 925 }, { "epoch": 0.05, "learning_rate": 4.9966074726841254e-05, "loss": 0.9311, "step": 930 }, { "epoch": 0.05, "learning_rate": 4.996570904198508e-05, "loss": 0.8446, "step": 935 }, { "epoch": 0.05, "learning_rate": 4.99653413981507e-05, "loss": 0.8019, "step": 940 }, { "epoch": 0.05, "learning_rate": 4.996497179536699e-05, "loss": 0.8191, "step": 945 }, { "epoch": 0.05, "learning_rate": 4.9964600233662915e-05, "loss": 1.0145, "step": 950 }, { "epoch": 0.05, "learning_rate": 4.996422671306766e-05, "loss": 0.8515, "step": 955 }, { "epoch": 0.05, "learning_rate": 4.996385123361053e-05, "loss": 0.8751, "step": 960 }, { "epoch": 0.05, "learning_rate": 4.996347379532097e-05, "loss": 0.8246, "step": 965 }, { "epoch": 0.05, "learning_rate": 4.996309439822862e-05, "loss": 0.8811, "step": 970 }, { "epoch": 0.05, "learning_rate": 4.996271304236323e-05, "loss": 0.8678, "step": 975 }, { "epoch": 0.05, "learning_rate": 4.996232972775474e-05, "loss": 0.7357, "step": 980 }, { "epoch": 0.05, "learning_rate": 4.9961944454433204e-05, "loss": 0.8088, "step": 985 }, { "epoch": 0.05, "learning_rate": 4.996155722242888e-05, "loss": 0.8257, "step": 990 }, { "epoch": 0.05, "learning_rate": 4.996116803177214e-05, "loss": 0.8482, "step": 995 }, { "epoch": 0.05, "learning_rate": 4.996077688249352e-05, "loss": 0.928, "step": 1000 }, { "epoch": 0.05, "learning_rate": 4.996038377462372e-05, "loss": 0.8771, "step": 1005 }, { "epoch": 0.05, "learning_rate": 4.9959988708193585e-05, "loss": 1.0969, "step": 1010 }, { "epoch": 0.05, "learning_rate": 4.9959591683234106e-05, "loss": 0.811, "step": 1015 }, { "epoch": 0.05, "learning_rate": 4.9959192699776445e-05, "loss": 0.8073, "step": 1020 }, { "epoch": 0.05, "learning_rate": 4.99587917578519e-05, "loss": 0.8761, "step": 1025 }, { "epoch": 0.06, "learning_rate": 4.995838885749194e-05, "loss": 0.8305, "step": 1030 }, { "epoch": 0.06, "learning_rate": 4.995798399872818e-05, "loss": 0.9013, "step": 1035 }, { "epoch": 0.06, "learning_rate": 4.995757718159238e-05, "loss": 0.8605, "step": 1040 }, { "epoch": 0.06, "learning_rate": 4.995716840611647e-05, "loss": 0.7716, "step": 1045 }, { "epoch": 0.06, "learning_rate": 4.9956757672332515e-05, "loss": 0.9188, "step": 1050 }, { "epoch": 0.06, "learning_rate": 4.995634498027275e-05, "loss": 0.9129, "step": 1055 }, { "epoch": 0.06, "learning_rate": 4.995593032996957e-05, "loss": 0.8646, "step": 1060 }, { "epoch": 0.06, "learning_rate": 4.995551372145549e-05, "loss": 0.847, "step": 1065 }, { "epoch": 0.06, "learning_rate": 4.995509515476321e-05, "loss": 0.9522, "step": 1070 }, { "epoch": 0.06, "learning_rate": 4.9954674629925576e-05, "loss": 0.9094, "step": 1075 }, { "epoch": 0.06, "learning_rate": 4.995425214697558e-05, "loss": 0.8598, "step": 1080 }, { "epoch": 0.06, "learning_rate": 4.995382770594637e-05, "loss": 0.7556, "step": 1085 }, { "epoch": 0.06, "learning_rate": 4.995340130687126e-05, "loss": 0.891, "step": 1090 }, { "epoch": 0.06, "learning_rate": 4.9952972949783706e-05, "loss": 0.7336, "step": 1095 }, { "epoch": 0.06, "learning_rate": 4.995254263471732e-05, "loss": 0.8502, "step": 1100 }, { "epoch": 0.06, "learning_rate": 4.995211036170585e-05, "loss": 0.8469, "step": 1105 }, { "epoch": 0.06, "learning_rate": 4.995167613078324e-05, "loss": 0.8072, "step": 1110 }, { "epoch": 0.06, "learning_rate": 4.995123994198355e-05, "loss": 0.7748, "step": 1115 }, { "epoch": 0.06, "learning_rate": 4.9950801795341006e-05, "loss": 0.9986, "step": 1120 }, { "epoch": 0.06, "learning_rate": 4.995036169089e-05, "loss": 0.9872, "step": 1125 }, { "epoch": 0.06, "learning_rate": 4.994991962866505e-05, "loss": 0.9266, "step": 1130 }, { "epoch": 0.06, "learning_rate": 4.9949475608700846e-05, "loss": 0.9211, "step": 1135 }, { "epoch": 0.06, "learning_rate": 4.994902963103224e-05, "loss": 0.8535, "step": 1140 }, { "epoch": 0.06, "learning_rate": 4.994858169569422e-05, "loss": 0.692, "step": 1145 }, { "epoch": 0.06, "learning_rate": 4.994813180272192e-05, "loss": 0.7803, "step": 1150 }, { "epoch": 0.06, "learning_rate": 4.994767995215067e-05, "loss": 0.7417, "step": 1155 }, { "epoch": 0.06, "learning_rate": 4.99472261440159e-05, "loss": 0.7841, "step": 1160 }, { "epoch": 0.06, "learning_rate": 4.9946770378353225e-05, "loss": 0.8977, "step": 1165 }, { "epoch": 0.06, "learning_rate": 4.994631265519842e-05, "loss": 0.7649, "step": 1170 }, { "epoch": 0.06, "learning_rate": 4.994585297458739e-05, "loss": 0.9532, "step": 1175 }, { "epoch": 0.06, "learning_rate": 4.99453913365562e-05, "loss": 0.9149, "step": 1180 }, { "epoch": 0.06, "learning_rate": 4.994492774114109e-05, "loss": 0.7847, "step": 1185 }, { "epoch": 0.06, "learning_rate": 4.994446218837842e-05, "loss": 0.8236, "step": 1190 }, { "epoch": 0.06, "learning_rate": 4.994399467830473e-05, "loss": 0.8189, "step": 1195 }, { "epoch": 0.06, "learning_rate": 4.99435252109567e-05, "loss": 0.9335, "step": 1200 }, { "epoch": 0.06, "learning_rate": 4.994305378637116e-05, "loss": 0.9427, "step": 1205 }, { "epoch": 0.06, "learning_rate": 4.994258040458513e-05, "loss": 0.8016, "step": 1210 }, { "epoch": 0.06, "learning_rate": 4.9942105065635713e-05, "loss": 1.0169, "step": 1215 }, { "epoch": 0.07, "learning_rate": 4.994162776956024e-05, "loss": 0.8399, "step": 1220 }, { "epoch": 0.07, "learning_rate": 4.994114851639615e-05, "loss": 0.9022, "step": 1225 }, { "epoch": 0.07, "learning_rate": 4.9940667306181056e-05, "loss": 0.787, "step": 1230 }, { "epoch": 0.07, "learning_rate": 4.994018413895271e-05, "loss": 0.9417, "step": 1235 }, { "epoch": 0.07, "learning_rate": 4.9939699014749015e-05, "loss": 0.834, "step": 1240 }, { "epoch": 0.07, "learning_rate": 4.993921193360806e-05, "loss": 0.9071, "step": 1245 }, { "epoch": 0.07, "learning_rate": 4.993872289556804e-05, "loss": 0.9327, "step": 1250 }, { "epoch": 0.07, "learning_rate": 4.9938231900667345e-05, "loss": 0.9787, "step": 1255 }, { "epoch": 0.07, "learning_rate": 4.99377389489445e-05, "loss": 0.8714, "step": 1260 }, { "epoch": 0.07, "learning_rate": 4.993724404043819e-05, "loss": 0.995, "step": 1265 }, { "epoch": 0.07, "learning_rate": 4.993674717518723e-05, "loss": 0.7538, "step": 1270 }, { "epoch": 0.07, "learning_rate": 4.9936248353230624e-05, "loss": 0.9466, "step": 1275 }, { "epoch": 0.07, "learning_rate": 4.993574757460751e-05, "loss": 1.0344, "step": 1280 }, { "epoch": 0.07, "learning_rate": 4.993524483935718e-05, "loss": 0.9573, "step": 1285 }, { "epoch": 0.07, "learning_rate": 4.993474014751908e-05, "loss": 0.9206, "step": 1290 }, { "epoch": 0.07, "learning_rate": 4.9934233499132817e-05, "loss": 0.9753, "step": 1295 }, { "epoch": 0.07, "learning_rate": 4.9933724894238146e-05, "loss": 0.7576, "step": 1300 }, { "epoch": 0.07, "learning_rate": 4.993321433287496e-05, "loss": 0.9328, "step": 1305 }, { "epoch": 0.07, "learning_rate": 4.9932701815083346e-05, "loss": 0.8051, "step": 1310 }, { "epoch": 0.07, "learning_rate": 4.99321873409035e-05, "loss": 0.8396, "step": 1315 }, { "epoch": 0.07, "learning_rate": 4.9931670910375805e-05, "loss": 0.8743, "step": 1320 }, { "epoch": 0.07, "learning_rate": 4.993115252354077e-05, "loss": 0.8973, "step": 1325 }, { "epoch": 0.07, "learning_rate": 4.9930632180439085e-05, "loss": 0.8204, "step": 1330 }, { "epoch": 0.07, "learning_rate": 4.993010988111157e-05, "loss": 0.8987, "step": 1335 }, { "epoch": 0.07, "learning_rate": 4.992958562559921e-05, "loss": 0.9644, "step": 1340 }, { "epoch": 0.07, "learning_rate": 4.992905941394315e-05, "loss": 0.8282, "step": 1345 }, { "epoch": 0.07, "learning_rate": 4.9928531246184664e-05, "loss": 0.7461, "step": 1350 }, { "epoch": 0.07, "learning_rate": 4.992800112236521e-05, "loss": 0.9401, "step": 1355 }, { "epoch": 0.07, "learning_rate": 4.9927469042526374e-05, "loss": 0.7926, "step": 1360 }, { "epoch": 0.07, "learning_rate": 4.992693500670992e-05, "loss": 0.9393, "step": 1365 }, { "epoch": 0.07, "learning_rate": 4.9926399014957735e-05, "loss": 0.7676, "step": 1370 }, { "epoch": 0.07, "learning_rate": 4.992586106731189e-05, "loss": 0.882, "step": 1375 }, { "epoch": 0.07, "learning_rate": 4.992532116381459e-05, "loss": 0.8941, "step": 1380 }, { "epoch": 0.07, "learning_rate": 4.9924779304508205e-05, "loss": 0.9527, "step": 1385 }, { "epoch": 0.07, "learning_rate": 4.992423548943524e-05, "loss": 0.9959, "step": 1390 }, { "epoch": 0.07, "learning_rate": 4.9923689718638386e-05, "loss": 0.9233, "step": 1395 }, { "epoch": 0.07, "learning_rate": 4.992314199216046e-05, "loss": 0.8311, "step": 1400 }, { "epoch": 0.08, "learning_rate": 4.992259231004443e-05, "loss": 0.849, "step": 1405 }, { "epoch": 0.08, "learning_rate": 4.9922040672333435e-05, "loss": 0.8877, "step": 1410 }, { "epoch": 0.08, "learning_rate": 4.992148707907077e-05, "loss": 0.8116, "step": 1415 }, { "epoch": 0.08, "learning_rate": 4.9920931530299854e-05, "loss": 0.8211, "step": 1420 }, { "epoch": 0.08, "learning_rate": 4.9920374026064294e-05, "loss": 0.7091, "step": 1425 }, { "epoch": 0.08, "learning_rate": 4.991981456640783e-05, "loss": 0.8518, "step": 1430 }, { "epoch": 0.08, "learning_rate": 4.9919253151374355e-05, "loss": 0.7311, "step": 1435 }, { "epoch": 0.08, "learning_rate": 4.9918689781007946e-05, "loss": 0.9369, "step": 1440 }, { "epoch": 0.08, "learning_rate": 4.9918124455352774e-05, "loss": 0.8322, "step": 1445 }, { "epoch": 0.08, "learning_rate": 4.991755717445322e-05, "loss": 0.8642, "step": 1450 }, { "epoch": 0.08, "learning_rate": 4.99169879383538e-05, "loss": 0.8866, "step": 1455 }, { "epoch": 0.08, "learning_rate": 4.9916416747099166e-05, "loss": 0.9548, "step": 1460 }, { "epoch": 0.08, "learning_rate": 4.991584360073415e-05, "loss": 0.967, "step": 1465 }, { "epoch": 0.08, "learning_rate": 4.991526849930371e-05, "loss": 0.8791, "step": 1470 }, { "epoch": 0.08, "learning_rate": 4.9914691442852976e-05, "loss": 1.0645, "step": 1475 }, { "epoch": 0.08, "learning_rate": 4.991411243142725e-05, "loss": 0.9111, "step": 1480 }, { "epoch": 0.08, "learning_rate": 4.991353146507194e-05, "loss": 0.8932, "step": 1485 }, { "epoch": 0.08, "learning_rate": 4.991294854383264e-05, "loss": 0.8218, "step": 1490 }, { "epoch": 0.08, "learning_rate": 4.991236366775509e-05, "loss": 0.887, "step": 1495 }, { "epoch": 0.08, "learning_rate": 4.991177683688518e-05, "loss": 0.8274, "step": 1500 }, { "epoch": 0.08, "learning_rate": 4.991118805126897e-05, "loss": 0.9471, "step": 1505 }, { "epoch": 0.08, "learning_rate": 4.991059731095265e-05, "loss": 0.7971, "step": 1510 }, { "epoch": 0.08, "learning_rate": 4.9910004615982565e-05, "loss": 0.9093, "step": 1515 }, { "epoch": 0.08, "learning_rate": 4.990940996640524e-05, "loss": 0.8429, "step": 1520 }, { "epoch": 0.08, "learning_rate": 4.990881336226732e-05, "loss": 0.8453, "step": 1525 }, { "epoch": 0.08, "learning_rate": 4.990821480361563e-05, "loss": 0.8797, "step": 1530 }, { "epoch": 0.08, "learning_rate": 4.990761429049713e-05, "loss": 0.9274, "step": 1535 }, { "epoch": 0.08, "learning_rate": 4.990701182295894e-05, "loss": 0.8274, "step": 1540 }, { "epoch": 0.08, "learning_rate": 4.990640740104834e-05, "loss": 0.8599, "step": 1545 }, { "epoch": 0.08, "learning_rate": 4.990580102481275e-05, "loss": 0.7916, "step": 1550 }, { "epoch": 0.08, "learning_rate": 4.990519269429975e-05, "loss": 0.777, "step": 1555 }, { "epoch": 0.08, "learning_rate": 4.990458240955708e-05, "loss": 0.8698, "step": 1560 }, { "epoch": 0.08, "learning_rate": 4.990397017063262e-05, "loss": 0.7045, "step": 1565 }, { "epoch": 0.08, "learning_rate": 4.990335597757442e-05, "loss": 0.897, "step": 1570 }, { "epoch": 0.08, "learning_rate": 4.990273983043067e-05, "loss": 0.9374, "step": 1575 }, { "epoch": 0.08, "learning_rate": 4.9902121729249704e-05, "loss": 0.9631, "step": 1580 }, { "epoch": 0.08, "learning_rate": 4.990150167408004e-05, "loss": 0.9525, "step": 1585 }, { "epoch": 0.09, "learning_rate": 4.9900879664970335e-05, "loss": 0.7914, "step": 1590 }, { "epoch": 0.09, "learning_rate": 4.990025570196937e-05, "loss": 0.8088, "step": 1595 }, { "epoch": 0.09, "learning_rate": 4.989962978512613e-05, "loss": 0.9072, "step": 1600 }, { "epoch": 0.09, "learning_rate": 4.989900191448972e-05, "loss": 0.8763, "step": 1605 }, { "epoch": 0.09, "learning_rate": 4.989837209010941e-05, "loss": 0.8714, "step": 1610 }, { "epoch": 0.09, "learning_rate": 4.9897740312034616e-05, "loss": 0.8351, "step": 1615 }, { "epoch": 0.09, "learning_rate": 4.9897106580314913e-05, "loss": 0.9255, "step": 1620 }, { "epoch": 0.09, "learning_rate": 4.989647089500003e-05, "loss": 0.8401, "step": 1625 }, { "epoch": 0.09, "learning_rate": 4.989583325613984e-05, "loss": 0.7644, "step": 1630 }, { "epoch": 0.09, "learning_rate": 4.989519366378438e-05, "loss": 0.8625, "step": 1635 }, { "epoch": 0.09, "learning_rate": 4.989455211798384e-05, "loss": 1.0019, "step": 1640 }, { "epoch": 0.09, "learning_rate": 4.989390861878856e-05, "loss": 0.8116, "step": 1645 }, { "epoch": 0.09, "learning_rate": 4.989326316624903e-05, "loss": 0.9912, "step": 1650 }, { "epoch": 0.09, "learning_rate": 4.9892615760415905e-05, "loss": 0.7684, "step": 1655 }, { "epoch": 0.09, "learning_rate": 4.9891966401339975e-05, "loss": 0.8937, "step": 1660 }, { "epoch": 0.09, "learning_rate": 4.989131508907219e-05, "loss": 0.946, "step": 1665 }, { "epoch": 0.09, "learning_rate": 4.989066182366367e-05, "loss": 0.8702, "step": 1670 }, { "epoch": 0.09, "learning_rate": 4.9890006605165665e-05, "loss": 0.8557, "step": 1675 }, { "epoch": 0.09, "learning_rate": 4.9889349433629584e-05, "loss": 0.9027, "step": 1680 }, { "epoch": 0.09, "learning_rate": 4.988869030910701e-05, "loss": 0.7927, "step": 1685 }, { "epoch": 0.09, "learning_rate": 4.988802923164965e-05, "loss": 0.839, "step": 1690 }, { "epoch": 0.09, "learning_rate": 4.9887366201309374e-05, "loss": 0.9143, "step": 1695 }, { "epoch": 0.09, "learning_rate": 4.9886701218138205e-05, "loss": 0.934, "step": 1700 }, { "epoch": 0.09, "learning_rate": 4.988603428218834e-05, "loss": 0.8441, "step": 1705 }, { "epoch": 0.09, "learning_rate": 4.988536539351209e-05, "loss": 0.926, "step": 1710 }, { "epoch": 0.09, "learning_rate": 4.988469455216196e-05, "loss": 0.7813, "step": 1715 }, { "epoch": 0.09, "learning_rate": 4.988402175819058e-05, "loss": 0.8033, "step": 1720 }, { "epoch": 0.09, "learning_rate": 4.988334701165074e-05, "loss": 0.7388, "step": 1725 }, { "epoch": 0.09, "learning_rate": 4.988267031259538e-05, "loss": 0.9363, "step": 1730 }, { "epoch": 0.09, "learning_rate": 4.988199166107761e-05, "loss": 0.9499, "step": 1735 }, { "epoch": 0.09, "learning_rate": 4.988131105715068e-05, "loss": 0.9348, "step": 1740 }, { "epoch": 0.09, "learning_rate": 4.9880628500867985e-05, "loss": 0.8518, "step": 1745 }, { "epoch": 0.09, "learning_rate": 4.987994399228309e-05, "loss": 1.003, "step": 1750 }, { "epoch": 0.09, "learning_rate": 4.987925753144971e-05, "loss": 0.9232, "step": 1755 }, { "epoch": 0.09, "learning_rate": 4.987856911842171e-05, "loss": 0.9477, "step": 1760 }, { "epoch": 0.09, "learning_rate": 4.987787875325309e-05, "loss": 0.7504, "step": 1765 }, { "epoch": 0.09, "learning_rate": 4.9877186435998045e-05, "loss": 0.9137, "step": 1770 }, { "epoch": 0.09, "learning_rate": 4.9876492166710884e-05, "loss": 0.8655, "step": 1775 }, { "epoch": 0.1, "learning_rate": 4.987579594544608e-05, "loss": 0.6944, "step": 1780 }, { "epoch": 0.1, "learning_rate": 4.987509777225827e-05, "loss": 0.8168, "step": 1785 }, { "epoch": 0.1, "learning_rate": 4.987439764720225e-05, "loss": 0.9912, "step": 1790 }, { "epoch": 0.1, "learning_rate": 4.9873695570332934e-05, "loss": 0.9112, "step": 1795 }, { "epoch": 0.1, "learning_rate": 4.9872991541705424e-05, "loss": 0.8157, "step": 1800 }, { "epoch": 0.1, "learning_rate": 4.987228556137497e-05, "loss": 0.869, "step": 1805 }, { "epoch": 0.1, "learning_rate": 4.987157762939695e-05, "loss": 0.8406, "step": 1810 }, { "epoch": 0.1, "learning_rate": 4.9870867745826923e-05, "loss": 0.8385, "step": 1815 }, { "epoch": 0.1, "learning_rate": 4.987015591072058e-05, "loss": 0.9317, "step": 1820 }, { "epoch": 0.1, "learning_rate": 4.9869442124133805e-05, "loss": 0.8695, "step": 1825 }, { "epoch": 0.1, "learning_rate": 4.9868726386122575e-05, "loss": 0.9023, "step": 1830 }, { "epoch": 0.1, "learning_rate": 4.986800869674307e-05, "loss": 0.6973, "step": 1835 }, { "epoch": 0.1, "learning_rate": 4.9867289056051605e-05, "loss": 0.9736, "step": 1840 }, { "epoch": 0.1, "learning_rate": 4.986656746410464e-05, "loss": 0.806, "step": 1845 }, { "epoch": 0.1, "learning_rate": 4.986584392095879e-05, "loss": 0.6967, "step": 1850 }, { "epoch": 0.1, "learning_rate": 4.9865118426670845e-05, "loss": 0.8629, "step": 1855 }, { "epoch": 0.1, "learning_rate": 4.986439098129772e-05, "loss": 0.8126, "step": 1860 }, { "epoch": 0.1, "learning_rate": 4.9863661584896506e-05, "loss": 0.8886, "step": 1865 }, { "epoch": 0.1, "learning_rate": 4.9862930237524425e-05, "loss": 1.0021, "step": 1870 }, { "epoch": 0.1, "learning_rate": 4.986219693923887e-05, "loss": 0.7813, "step": 1875 }, { "epoch": 0.1, "learning_rate": 4.9861461690097377e-05, "loss": 0.719, "step": 1880 }, { "epoch": 0.1, "learning_rate": 4.9860724490157645e-05, "loss": 0.7863, "step": 1885 }, { "epoch": 0.1, "learning_rate": 4.9859985339477516e-05, "loss": 0.8206, "step": 1890 }, { "epoch": 0.1, "learning_rate": 4.985924423811499e-05, "loss": 0.8631, "step": 1895 }, { "epoch": 0.1, "learning_rate": 4.985850118612821e-05, "loss": 1.0037, "step": 1900 }, { "epoch": 0.1, "learning_rate": 4.9857756183575494e-05, "loss": 0.9073, "step": 1905 }, { "epoch": 0.1, "learning_rate": 4.985700923051529e-05, "loss": 0.8824, "step": 1910 }, { "epoch": 0.1, "learning_rate": 4.9856260327006213e-05, "loss": 0.8418, "step": 1915 }, { "epoch": 0.1, "learning_rate": 4.985550947310703e-05, "loss": 0.8256, "step": 1920 }, { "epoch": 0.1, "learning_rate": 4.985475666887666e-05, "loss": 0.8181, "step": 1925 }, { "epoch": 0.1, "learning_rate": 4.985400191437416e-05, "loss": 0.8676, "step": 1930 }, { "epoch": 0.1, "learning_rate": 4.985324520965876e-05, "loss": 0.9315, "step": 1935 }, { "epoch": 0.1, "learning_rate": 4.985248655478984e-05, "loss": 0.9193, "step": 1940 }, { "epoch": 0.1, "learning_rate": 4.985172594982693e-05, "loss": 0.6758, "step": 1945 }, { "epoch": 0.1, "learning_rate": 4.985096339482971e-05, "loss": 1.0395, "step": 1950 }, { "epoch": 0.1, "learning_rate": 4.9850198889858005e-05, "loss": 0.9421, "step": 1955 }, { "epoch": 0.1, "learning_rate": 4.984943243497182e-05, "loss": 0.8176, "step": 1960 }, { "epoch": 0.11, "learning_rate": 4.984866403023129e-05, "loss": 0.8059, "step": 1965 }, { "epoch": 0.11, "learning_rate": 4.9847893675696697e-05, "loss": 0.7081, "step": 1970 }, { "epoch": 0.11, "learning_rate": 4.9847121371428504e-05, "loss": 1.0082, "step": 1975 }, { "epoch": 0.11, "learning_rate": 4.984634711748731e-05, "loss": 0.8226, "step": 1980 }, { "epoch": 0.11, "learning_rate": 4.984557091393387e-05, "loss": 0.8726, "step": 1985 }, { "epoch": 0.11, "learning_rate": 4.9844792760829075e-05, "loss": 0.8124, "step": 1990 }, { "epoch": 0.11, "learning_rate": 4.9844012658234e-05, "loss": 0.9257, "step": 1995 }, { "epoch": 0.11, "learning_rate": 4.9843230606209845e-05, "loss": 0.8869, "step": 2000 }, { "epoch": 0.11, "learning_rate": 4.984244660481798e-05, "loss": 0.8605, "step": 2005 }, { "epoch": 0.11, "learning_rate": 4.9841660654119924e-05, "loss": 0.8098, "step": 2010 }, { "epoch": 0.11, "learning_rate": 4.9840872754177356e-05, "loss": 0.9607, "step": 2015 }, { "epoch": 0.11, "learning_rate": 4.984008290505209e-05, "loss": 0.9345, "step": 2020 }, { "epoch": 0.11, "learning_rate": 4.9839291106806095e-05, "loss": 0.7879, "step": 2025 }, { "epoch": 0.11, "learning_rate": 4.9838497359501524e-05, "loss": 0.7783, "step": 2030 }, { "epoch": 0.11, "learning_rate": 4.9837701663200636e-05, "loss": 0.7527, "step": 2035 }, { "epoch": 0.11, "learning_rate": 4.983690401796588e-05, "loss": 0.8556, "step": 2040 }, { "epoch": 0.11, "learning_rate": 4.983610442385984e-05, "loss": 0.8758, "step": 2045 }, { "epoch": 0.11, "learning_rate": 4.9835302880945266e-05, "loss": 0.9996, "step": 2050 }, { "epoch": 0.11, "learning_rate": 4.9834499389285036e-05, "loss": 0.9905, "step": 2055 }, { "epoch": 0.11, "learning_rate": 4.9833693948942217e-05, "loss": 0.8837, "step": 2060 }, { "epoch": 0.11, "learning_rate": 4.983288655998e-05, "loss": 0.7579, "step": 2065 }, { "epoch": 0.11, "learning_rate": 4.983207722246173e-05, "loss": 0.9043, "step": 2070 }, { "epoch": 0.11, "learning_rate": 4.983126593645092e-05, "loss": 0.9022, "step": 2075 }, { "epoch": 0.11, "learning_rate": 4.9830452702011236e-05, "loss": 1.0319, "step": 2080 }, { "epoch": 0.11, "learning_rate": 4.9829637519206486e-05, "loss": 0.8624, "step": 2085 }, { "epoch": 0.11, "learning_rate": 4.9828820388100625e-05, "loss": 0.8045, "step": 2090 }, { "epoch": 0.11, "learning_rate": 4.982800130875778e-05, "loss": 0.9147, "step": 2095 }, { "epoch": 0.11, "learning_rate": 4.982718028124223e-05, "loss": 0.9279, "step": 2100 }, { "epoch": 0.11, "learning_rate": 4.982635730561837e-05, "loss": 0.8517, "step": 2105 }, { "epoch": 0.11, "learning_rate": 4.98255323819508e-05, "loss": 0.6737, "step": 2110 }, { "epoch": 0.11, "learning_rate": 4.9824705510304247e-05, "loss": 0.7965, "step": 2115 }, { "epoch": 0.11, "learning_rate": 4.982387669074359e-05, "loss": 0.7378, "step": 2120 }, { "epoch": 0.11, "learning_rate": 4.9823045923333855e-05, "loss": 0.8486, "step": 2125 }, { "epoch": 0.11, "learning_rate": 4.982221320814024e-05, "loss": 0.8096, "step": 2130 }, { "epoch": 0.11, "learning_rate": 4.982137854522809e-05, "loss": 0.7806, "step": 2135 }, { "epoch": 0.11, "learning_rate": 4.982054193466289e-05, "loss": 0.8132, "step": 2140 }, { "epoch": 0.11, "learning_rate": 4.981970337651029e-05, "loss": 0.7834, "step": 2145 }, { "epoch": 0.12, "learning_rate": 4.981886287083607e-05, "loss": 0.8362, "step": 2150 }, { "epoch": 0.12, "learning_rate": 4.981802041770621e-05, "loss": 0.9176, "step": 2155 }, { "epoch": 0.12, "learning_rate": 4.981717601718681e-05, "loss": 0.8825, "step": 2160 }, { "epoch": 0.12, "learning_rate": 4.981632966934411e-05, "loss": 0.9248, "step": 2165 }, { "epoch": 0.12, "learning_rate": 4.981548137424453e-05, "loss": 0.7836, "step": 2170 }, { "epoch": 0.12, "learning_rate": 4.9814631131954635e-05, "loss": 0.8113, "step": 2175 }, { "epoch": 0.12, "learning_rate": 4.981377894254114e-05, "loss": 0.8362, "step": 2180 }, { "epoch": 0.12, "learning_rate": 4.981292480607091e-05, "loss": 0.8749, "step": 2185 }, { "epoch": 0.12, "learning_rate": 4.981206872261097e-05, "loss": 0.9427, "step": 2190 }, { "epoch": 0.12, "learning_rate": 4.98112106922285e-05, "loss": 0.8892, "step": 2195 }, { "epoch": 0.12, "learning_rate": 4.981035071499081e-05, "loss": 0.8796, "step": 2200 }, { "epoch": 0.12, "learning_rate": 4.980948879096539e-05, "loss": 0.9027, "step": 2205 }, { "epoch": 0.12, "learning_rate": 4.980862492021987e-05, "loss": 0.8985, "step": 2210 }, { "epoch": 0.12, "learning_rate": 4.9807759102822036e-05, "loss": 0.8132, "step": 2215 }, { "epoch": 0.12, "learning_rate": 4.980689133883983e-05, "loss": 0.7887, "step": 2220 }, { "epoch": 0.12, "learning_rate": 4.980602162834135e-05, "loss": 0.8031, "step": 2225 }, { "epoch": 0.12, "learning_rate": 4.9805149971394815e-05, "loss": 0.6112, "step": 2230 }, { "epoch": 0.12, "learning_rate": 4.9804276368068634e-05, "loss": 0.7819, "step": 2235 }, { "epoch": 0.12, "learning_rate": 4.980340081843137e-05, "loss": 0.8431, "step": 2240 }, { "epoch": 0.12, "learning_rate": 4.98025233225517e-05, "loss": 0.8264, "step": 2245 }, { "epoch": 0.12, "learning_rate": 4.980164388049849e-05, "loss": 0.8417, "step": 2250 }, { "epoch": 0.12, "learning_rate": 4.980076249234075e-05, "loss": 0.8623, "step": 2255 }, { "epoch": 0.12, "learning_rate": 4.979987915814763e-05, "loss": 0.7658, "step": 2260 }, { "epoch": 0.12, "learning_rate": 4.979899387798846e-05, "loss": 0.8775, "step": 2265 }, { "epoch": 0.12, "learning_rate": 4.979810665193269e-05, "loss": 0.8722, "step": 2270 }, { "epoch": 0.12, "learning_rate": 4.979721748004993e-05, "loss": 0.7641, "step": 2275 }, { "epoch": 0.12, "learning_rate": 4.979632636240997e-05, "loss": 0.7558, "step": 2280 }, { "epoch": 0.12, "learning_rate": 4.979543329908273e-05, "loss": 0.8515, "step": 2285 }, { "epoch": 0.12, "learning_rate": 4.979453829013827e-05, "loss": 0.8285, "step": 2290 }, { "epoch": 0.12, "learning_rate": 4.979364133564684e-05, "loss": 0.9257, "step": 2295 }, { "epoch": 0.12, "learning_rate": 4.97927424356788e-05, "loss": 0.7933, "step": 2300 }, { "epoch": 0.12, "learning_rate": 4.97918415903047e-05, "loss": 0.7068, "step": 2305 }, { "epoch": 0.12, "learning_rate": 4.979093879959521e-05, "loss": 0.7912, "step": 2310 }, { "epoch": 0.12, "learning_rate": 4.9790034063621196e-05, "loss": 0.7526, "step": 2315 }, { "epoch": 0.12, "learning_rate": 4.978912738245362e-05, "loss": 0.8986, "step": 2320 }, { "epoch": 0.12, "learning_rate": 4.978821875616364e-05, "loss": 0.8767, "step": 2325 }, { "epoch": 0.12, "learning_rate": 4.9787308184822554e-05, "loss": 0.8165, "step": 2330 }, { "epoch": 0.12, "learning_rate": 4.9786395668501815e-05, "loss": 0.7368, "step": 2335 }, { "epoch": 0.13, "learning_rate": 4.978548120727302e-05, "loss": 0.9021, "step": 2340 }, { "epoch": 0.13, "learning_rate": 4.978456480120792e-05, "loss": 0.8389, "step": 2345 }, { "epoch": 0.13, "learning_rate": 4.978364645037843e-05, "loss": 0.7349, "step": 2350 }, { "epoch": 0.13, "learning_rate": 4.97827261548566e-05, "loss": 0.7653, "step": 2355 }, { "epoch": 0.13, "learning_rate": 4.9781803914714654e-05, "loss": 0.7587, "step": 2360 }, { "epoch": 0.13, "learning_rate": 4.978087973002495e-05, "loss": 0.8199, "step": 2365 }, { "epoch": 0.13, "learning_rate": 4.9779953600860005e-05, "loss": 0.7978, "step": 2370 }, { "epoch": 0.13, "learning_rate": 4.97790255272925e-05, "loss": 1.0208, "step": 2375 }, { "epoch": 0.13, "learning_rate": 4.977809550939525e-05, "loss": 0.7798, "step": 2380 }, { "epoch": 0.13, "learning_rate": 4.977716354724122e-05, "loss": 0.8695, "step": 2385 }, { "epoch": 0.13, "learning_rate": 4.977622964090356e-05, "loss": 0.8421, "step": 2390 }, { "epoch": 0.13, "learning_rate": 4.9775293790455536e-05, "loss": 0.7212, "step": 2395 }, { "epoch": 0.13, "learning_rate": 4.977435599597058e-05, "loss": 0.8967, "step": 2400 }, { "epoch": 0.13, "learning_rate": 4.9773416257522286e-05, "loss": 0.895, "step": 2405 }, { "epoch": 0.13, "learning_rate": 4.977247457518439e-05, "loss": 0.9811, "step": 2410 }, { "epoch": 0.13, "learning_rate": 4.9771530949030776e-05, "loss": 0.8655, "step": 2415 }, { "epoch": 0.13, "learning_rate": 4.97705853791355e-05, "loss": 0.9284, "step": 2420 }, { "epoch": 0.13, "learning_rate": 4.976963786557274e-05, "loss": 0.675, "step": 2425 }, { "epoch": 0.13, "learning_rate": 4.976868840841686e-05, "loss": 0.8499, "step": 2430 }, { "epoch": 0.13, "learning_rate": 4.976773700774236e-05, "loss": 1.0544, "step": 2435 }, { "epoch": 0.13, "learning_rate": 4.9766783663623886e-05, "loss": 0.9486, "step": 2440 }, { "epoch": 0.13, "learning_rate": 4.976582837613624e-05, "loss": 0.663, "step": 2445 }, { "epoch": 0.13, "learning_rate": 4.97648711453544e-05, "loss": 0.7747, "step": 2450 }, { "epoch": 0.13, "learning_rate": 4.9763911971353447e-05, "loss": 0.7663, "step": 2455 }, { "epoch": 0.13, "learning_rate": 4.9762950854208666e-05, "loss": 0.6607, "step": 2460 }, { "epoch": 0.13, "learning_rate": 4.976198779399548e-05, "loss": 0.813, "step": 2465 }, { "epoch": 0.13, "learning_rate": 4.976102279078944e-05, "loss": 0.7706, "step": 2470 }, { "epoch": 0.13, "learning_rate": 4.976005584466626e-05, "loss": 0.8038, "step": 2475 }, { "epoch": 0.13, "learning_rate": 4.975908695570183e-05, "loss": 0.8027, "step": 2480 }, { "epoch": 0.13, "learning_rate": 4.9758116123972173e-05, "loss": 0.8031, "step": 2485 }, { "epoch": 0.13, "learning_rate": 4.975714334955346e-05, "loss": 0.9296, "step": 2490 }, { "epoch": 0.13, "learning_rate": 4.975616863252203e-05, "loss": 0.9807, "step": 2495 }, { "epoch": 0.13, "learning_rate": 4.9755191972954366e-05, "loss": 0.9434, "step": 2500 }, { "epoch": 0.13, "learning_rate": 4.9754213370927086e-05, "loss": 0.8829, "step": 2505 }, { "epoch": 0.13, "learning_rate": 4.975323282651701e-05, "loss": 1.0117, "step": 2510 }, { "epoch": 0.13, "learning_rate": 4.975225033980104e-05, "loss": 1.1015, "step": 2515 }, { "epoch": 0.13, "learning_rate": 4.9751265910856294e-05, "loss": 0.9377, "step": 2520 }, { "epoch": 0.14, "learning_rate": 4.975027953976e-05, "loss": 0.7232, "step": 2525 }, { "epoch": 0.14, "learning_rate": 4.9749291226589584e-05, "loss": 0.7432, "step": 2530 }, { "epoch": 0.14, "learning_rate": 4.974830097142257e-05, "loss": 0.9603, "step": 2535 }, { "epoch": 0.14, "learning_rate": 4.974730877433666e-05, "loss": 0.6104, "step": 2540 }, { "epoch": 0.14, "learning_rate": 4.974631463540973e-05, "loss": 0.7624, "step": 2545 }, { "epoch": 0.14, "learning_rate": 4.974531855471976e-05, "loss": 0.893, "step": 2550 }, { "epoch": 0.14, "learning_rate": 4.974432053234493e-05, "loss": 0.7997, "step": 2555 }, { "epoch": 0.14, "learning_rate": 4.9743320568363536e-05, "loss": 0.8702, "step": 2560 }, { "epoch": 0.14, "learning_rate": 4.974231866285406e-05, "loss": 0.9106, "step": 2565 }, { "epoch": 0.14, "learning_rate": 4.9741314815895104e-05, "loss": 0.7824, "step": 2570 }, { "epoch": 0.14, "learning_rate": 4.9740309027565434e-05, "loss": 0.792, "step": 2575 }, { "epoch": 0.14, "learning_rate": 4.973930129794398e-05, "loss": 0.8312, "step": 2580 }, { "epoch": 0.14, "learning_rate": 4.9738291627109814e-05, "loss": 0.9209, "step": 2585 }, { "epoch": 0.14, "learning_rate": 4.9737280015142165e-05, "loss": 0.7518, "step": 2590 }, { "epoch": 0.14, "learning_rate": 4.9736266462120394e-05, "loss": 0.845, "step": 2595 }, { "epoch": 0.14, "learning_rate": 4.9735250968124054e-05, "loss": 0.9205, "step": 2600 }, { "epoch": 0.14, "learning_rate": 4.973423353323281e-05, "loss": 0.7602, "step": 2605 }, { "epoch": 0.14, "learning_rate": 4.973321415752651e-05, "loss": 0.7718, "step": 2610 }, { "epoch": 0.14, "learning_rate": 4.973219284108513e-05, "loss": 0.9397, "step": 2615 }, { "epoch": 0.14, "learning_rate": 4.9731169583988815e-05, "loss": 0.5993, "step": 2620 }, { "epoch": 0.14, "learning_rate": 4.973014438631786e-05, "loss": 0.9764, "step": 2625 }, { "epoch": 0.14, "learning_rate": 4.97291172481527e-05, "loss": 0.8947, "step": 2630 }, { "epoch": 0.14, "learning_rate": 4.972808816957394e-05, "loss": 0.7173, "step": 2635 }, { "epoch": 0.14, "learning_rate": 4.9727057150662315e-05, "loss": 0.7808, "step": 2640 }, { "epoch": 0.14, "learning_rate": 4.9726024191498744e-05, "loss": 0.8548, "step": 2645 }, { "epoch": 0.14, "learning_rate": 4.972498929216427e-05, "loss": 0.771, "step": 2650 }, { "epoch": 0.14, "learning_rate": 4.9723952452740095e-05, "loss": 0.7493, "step": 2655 }, { "epoch": 0.14, "learning_rate": 4.972291367330759e-05, "loss": 0.8956, "step": 2660 }, { "epoch": 0.14, "learning_rate": 4.9721872953948244e-05, "loss": 1.0156, "step": 2665 }, { "epoch": 0.14, "learning_rate": 4.972083029474374e-05, "loss": 0.9684, "step": 2670 }, { "epoch": 0.14, "learning_rate": 4.971978569577587e-05, "loss": 0.7829, "step": 2675 }, { "epoch": 0.14, "learning_rate": 4.9718739157126627e-05, "loss": 0.7946, "step": 2680 }, { "epoch": 0.14, "learning_rate": 4.97176906788781e-05, "loss": 0.8013, "step": 2685 }, { "epoch": 0.14, "learning_rate": 4.971664026111259e-05, "loss": 0.7803, "step": 2690 }, { "epoch": 0.14, "learning_rate": 4.97155879039125e-05, "loss": 0.8442, "step": 2695 }, { "epoch": 0.14, "learning_rate": 4.9714533607360404e-05, "loss": 0.9028, "step": 2700 }, { "epoch": 0.14, "learning_rate": 4.971347737153904e-05, "loss": 0.945, "step": 2705 }, { "epoch": 0.14, "learning_rate": 4.971241919653128e-05, "loss": 0.8276, "step": 2710 }, { "epoch": 0.15, "learning_rate": 4.9711359082420156e-05, "loss": 0.841, "step": 2715 }, { "epoch": 0.15, "learning_rate": 4.971029702928886e-05, "loss": 0.9866, "step": 2720 }, { "epoch": 0.15, "learning_rate": 4.9709233037220713e-05, "loss": 0.7829, "step": 2725 }, { "epoch": 0.15, "learning_rate": 4.970816710629922e-05, "loss": 0.8162, "step": 2730 }, { "epoch": 0.15, "learning_rate": 4.9707099236608e-05, "loss": 0.8764, "step": 2735 }, { "epoch": 0.15, "learning_rate": 4.970602942823087e-05, "loss": 0.7875, "step": 2740 }, { "epoch": 0.15, "learning_rate": 4.970495768125176e-05, "loss": 0.863, "step": 2745 }, { "epoch": 0.15, "learning_rate": 4.9703883995754766e-05, "loss": 0.9755, "step": 2750 }, { "epoch": 0.15, "learning_rate": 4.9702808371824136e-05, "loss": 0.7829, "step": 2755 }, { "epoch": 0.15, "learning_rate": 4.9701730809544286e-05, "loss": 0.7883, "step": 2760 }, { "epoch": 0.15, "learning_rate": 4.970065130899974e-05, "loss": 0.9066, "step": 2765 }, { "epoch": 0.15, "learning_rate": 4.9699569870275236e-05, "loss": 0.8159, "step": 2770 }, { "epoch": 0.15, "learning_rate": 4.969848649345561e-05, "loss": 0.8148, "step": 2775 }, { "epoch": 0.15, "learning_rate": 4.969740117862587e-05, "loss": 0.7785, "step": 2780 }, { "epoch": 0.15, "learning_rate": 4.969631392587119e-05, "loss": 0.8492, "step": 2785 }, { "epoch": 0.15, "learning_rate": 4.9695224735276874e-05, "loss": 0.8909, "step": 2790 }, { "epoch": 0.15, "learning_rate": 4.9694133606928386e-05, "loss": 0.8065, "step": 2795 }, { "epoch": 0.15, "learning_rate": 4.9693040540911353e-05, "loss": 1.0437, "step": 2800 }, { "epoch": 0.15, "learning_rate": 4.9691945537311536e-05, "loss": 0.7663, "step": 2805 }, { "epoch": 0.15, "learning_rate": 4.969084859621486e-05, "loss": 0.761, "step": 2810 }, { "epoch": 0.15, "learning_rate": 4.968974971770739e-05, "loss": 0.8197, "step": 2815 }, { "epoch": 0.15, "learning_rate": 4.9688648901875365e-05, "loss": 0.8564, "step": 2820 }, { "epoch": 0.15, "learning_rate": 4.968754614880515e-05, "loss": 0.7359, "step": 2825 }, { "epoch": 0.15, "learning_rate": 4.968644145858328e-05, "loss": 0.8015, "step": 2830 }, { "epoch": 0.15, "learning_rate": 4.968533483129644e-05, "loss": 0.7678, "step": 2835 }, { "epoch": 0.15, "learning_rate": 4.9684226267031466e-05, "loss": 0.7663, "step": 2840 }, { "epoch": 0.15, "learning_rate": 4.968311576587533e-05, "loss": 0.8433, "step": 2845 }, { "epoch": 0.15, "learning_rate": 4.968200332791518e-05, "loss": 0.8641, "step": 2850 }, { "epoch": 0.15, "learning_rate": 4.96808889532383e-05, "loss": 0.7179, "step": 2855 }, { "epoch": 0.15, "learning_rate": 4.9679772641932134e-05, "loss": 0.7958, "step": 2860 }, { "epoch": 0.15, "learning_rate": 4.967865439408428e-05, "loss": 0.7732, "step": 2865 }, { "epoch": 0.15, "learning_rate": 4.9677534209782475e-05, "loss": 0.7677, "step": 2870 }, { "epoch": 0.15, "learning_rate": 4.967641208911461e-05, "loss": 0.954, "step": 2875 }, { "epoch": 0.15, "learning_rate": 4.967528803216876e-05, "loss": 0.9279, "step": 2880 }, { "epoch": 0.15, "learning_rate": 4.96741620390331e-05, "loss": 0.8701, "step": 2885 }, { "epoch": 0.15, "learning_rate": 4.9673034109796e-05, "loss": 0.813, "step": 2890 }, { "epoch": 0.15, "learning_rate": 4.967190424454595e-05, "loss": 0.9161, "step": 2895 }, { "epoch": 0.16, "learning_rate": 4.967077244337161e-05, "loss": 0.8249, "step": 2900 }, { "epoch": 0.16, "learning_rate": 4.966963870636181e-05, "loss": 0.8396, "step": 2905 }, { "epoch": 0.16, "learning_rate": 4.9668503033605485e-05, "loss": 0.792, "step": 2910 }, { "epoch": 0.16, "learning_rate": 4.966736542519176e-05, "loss": 0.706, "step": 2915 }, { "epoch": 0.16, "learning_rate": 4.966622588120989e-05, "loss": 0.7794, "step": 2920 }, { "epoch": 0.16, "learning_rate": 4.966508440174931e-05, "loss": 0.8807, "step": 2925 }, { "epoch": 0.16, "learning_rate": 4.966394098689956e-05, "loss": 0.8132, "step": 2930 }, { "epoch": 0.16, "learning_rate": 4.966279563675038e-05, "loss": 0.7731, "step": 2935 }, { "epoch": 0.16, "learning_rate": 4.966164835139164e-05, "loss": 0.8436, "step": 2940 }, { "epoch": 0.16, "learning_rate": 4.966049913091336e-05, "loss": 0.7004, "step": 2945 }, { "epoch": 0.16, "learning_rate": 4.9659347975405726e-05, "loss": 0.8035, "step": 2950 }, { "epoch": 0.16, "learning_rate": 4.9658194884959046e-05, "loss": 0.8535, "step": 2955 }, { "epoch": 0.16, "learning_rate": 4.965703985966381e-05, "loss": 0.7824, "step": 2960 }, { "epoch": 0.16, "learning_rate": 4.965588289961066e-05, "loss": 0.8664, "step": 2965 }, { "epoch": 0.16, "learning_rate": 4.965472400489035e-05, "loss": 0.7619, "step": 2970 }, { "epoch": 0.16, "learning_rate": 4.965356317559384e-05, "loss": 0.8276, "step": 2975 }, { "epoch": 0.16, "learning_rate": 4.965240041181222e-05, "loss": 0.8057, "step": 2980 }, { "epoch": 0.16, "learning_rate": 4.96512357136367e-05, "loss": 0.909, "step": 2985 }, { "epoch": 0.16, "learning_rate": 4.96500690811587e-05, "loss": 0.8557, "step": 2990 }, { "epoch": 0.16, "learning_rate": 4.9648900514469745e-05, "loss": 0.765, "step": 2995 }, { "epoch": 0.16, "learning_rate": 4.964773001366153e-05, "loss": 1.0673, "step": 3000 }, { "epoch": 0.16, "learning_rate": 4.96465575788259e-05, "loss": 0.8582, "step": 3005 }, { "epoch": 0.16, "learning_rate": 4.9645383210054865e-05, "loss": 0.9613, "step": 3010 }, { "epoch": 0.16, "learning_rate": 4.9644206907440563e-05, "loss": 0.8421, "step": 3015 }, { "epoch": 0.16, "learning_rate": 4.9643028671075285e-05, "loss": 0.9535, "step": 3020 }, { "epoch": 0.16, "learning_rate": 4.96418485010515e-05, "loss": 0.9872, "step": 3025 }, { "epoch": 0.16, "learning_rate": 4.964066639746181e-05, "loss": 0.9017, "step": 3030 }, { "epoch": 0.16, "learning_rate": 4.963948236039897e-05, "loss": 0.8539, "step": 3035 }, { "epoch": 0.16, "learning_rate": 4.963829638995588e-05, "loss": 0.8314, "step": 3040 }, { "epoch": 0.16, "learning_rate": 4.9637108486225604e-05, "loss": 0.8791, "step": 3045 }, { "epoch": 0.16, "learning_rate": 4.963591864930135e-05, "loss": 0.7487, "step": 3050 }, { "epoch": 0.16, "learning_rate": 4.963472687927649e-05, "loss": 0.8403, "step": 3055 }, { "epoch": 0.16, "learning_rate": 4.9633533176244526e-05, "loss": 0.7483, "step": 3060 }, { "epoch": 0.16, "learning_rate": 4.963233754029913e-05, "loss": 0.8811, "step": 3065 }, { "epoch": 0.16, "learning_rate": 4.9631139971534126e-05, "loss": 0.8106, "step": 3070 }, { "epoch": 0.16, "learning_rate": 4.962994047004348e-05, "loss": 0.8174, "step": 3075 }, { "epoch": 0.16, "learning_rate": 4.96287390359213e-05, "loss": 0.7392, "step": 3080 }, { "epoch": 0.17, "learning_rate": 4.962753566926187e-05, "loss": 0.7308, "step": 3085 }, { "epoch": 0.17, "learning_rate": 4.962633037015961e-05, "loss": 0.7862, "step": 3090 }, { "epoch": 0.17, "learning_rate": 4.962512313870911e-05, "loss": 0.7249, "step": 3095 }, { "epoch": 0.17, "learning_rate": 4.962391397500508e-05, "loss": 0.7166, "step": 3100 }, { "epoch": 0.17, "learning_rate": 4.96227028791424e-05, "loss": 0.861, "step": 3105 }, { "epoch": 0.17, "learning_rate": 4.962148985121612e-05, "loss": 0.8061, "step": 3110 }, { "epoch": 0.17, "learning_rate": 4.962027489132139e-05, "loss": 0.7095, "step": 3115 }, { "epoch": 0.17, "learning_rate": 4.961905799955357e-05, "loss": 0.839, "step": 3120 }, { "epoch": 0.17, "learning_rate": 4.961783917600815e-05, "loss": 0.8933, "step": 3125 }, { "epoch": 0.17, "learning_rate": 4.9616618420780745e-05, "loss": 0.8229, "step": 3130 }, { "epoch": 0.17, "learning_rate": 4.961539573396715e-05, "loss": 0.8342, "step": 3135 }, { "epoch": 0.17, "learning_rate": 4.961417111566332e-05, "loss": 0.7522, "step": 3140 }, { "epoch": 0.17, "learning_rate": 4.961294456596533e-05, "loss": 0.7861, "step": 3145 }, { "epoch": 0.17, "learning_rate": 4.961171608496943e-05, "loss": 0.7584, "step": 3150 }, { "epoch": 0.17, "learning_rate": 4.961048567277201e-05, "loss": 0.8426, "step": 3155 }, { "epoch": 0.17, "learning_rate": 4.960925332946963e-05, "loss": 0.8247, "step": 3160 }, { "epoch": 0.17, "learning_rate": 4.960801905515897e-05, "loss": 0.7422, "step": 3165 }, { "epoch": 0.17, "learning_rate": 4.960678284993689e-05, "loss": 0.7993, "step": 3170 }, { "epoch": 0.17, "learning_rate": 4.96055447139004e-05, "loss": 0.746, "step": 3175 }, { "epoch": 0.17, "learning_rate": 4.960430464714663e-05, "loss": 0.8515, "step": 3180 }, { "epoch": 0.17, "learning_rate": 4.9603062649772895e-05, "loss": 0.7546, "step": 3185 }, { "epoch": 0.17, "learning_rate": 4.960181872187666e-05, "loss": 0.7612, "step": 3190 }, { "epoch": 0.17, "learning_rate": 4.9600572863555515e-05, "loss": 0.8212, "step": 3195 }, { "epoch": 0.17, "learning_rate": 4.959932507490723e-05, "loss": 0.8165, "step": 3200 }, { "epoch": 0.17, "learning_rate": 4.9598075356029713e-05, "loss": 0.8134, "step": 3205 }, { "epoch": 0.17, "learning_rate": 4.9596823707021026e-05, "loss": 0.8584, "step": 3210 }, { "epoch": 0.17, "learning_rate": 4.9595570127979375e-05, "loss": 0.8665, "step": 3215 }, { "epoch": 0.17, "learning_rate": 4.959431461900313e-05, "loss": 0.8252, "step": 3220 }, { "epoch": 0.17, "learning_rate": 4.95930571801908e-05, "loss": 0.7456, "step": 3225 }, { "epoch": 0.17, "learning_rate": 4.959179781164107e-05, "loss": 0.7836, "step": 3230 }, { "epoch": 0.17, "learning_rate": 4.959053651345275e-05, "loss": 0.9068, "step": 3235 }, { "epoch": 0.17, "learning_rate": 4.958927328572479e-05, "loss": 0.9326, "step": 3240 }, { "epoch": 0.17, "learning_rate": 4.95882613143405e-05, "loss": 0.8543, "step": 3245 }, { "epoch": 0.17, "learning_rate": 4.958699461369112e-05, "loss": 0.845, "step": 3250 }, { "epoch": 0.17, "learning_rate": 4.958572598378003e-05, "loss": 0.765, "step": 3255 }, { "epoch": 0.17, "learning_rate": 4.958445542470679e-05, "loss": 0.8575, "step": 3260 }, { "epoch": 0.17, "learning_rate": 4.958318293657108e-05, "loss": 0.8214, "step": 3265 }, { "epoch": 0.17, "learning_rate": 4.9581908519472766e-05, "loss": 0.9318, "step": 3270 }, { "epoch": 0.18, "learning_rate": 4.958063217351183e-05, "loss": 0.8527, "step": 3275 }, { "epoch": 0.18, "learning_rate": 4.9579353898788435e-05, "loss": 0.9015, "step": 3280 }, { "epoch": 0.18, "learning_rate": 4.957807369540289e-05, "loss": 0.9015, "step": 3285 }, { "epoch": 0.18, "learning_rate": 4.9576791563455625e-05, "loss": 0.789, "step": 3290 }, { "epoch": 0.18, "learning_rate": 4.957550750304727e-05, "loss": 0.8802, "step": 3295 }, { "epoch": 0.18, "learning_rate": 4.957422151427856e-05, "loss": 0.8692, "step": 3300 }, { "epoch": 0.18, "learning_rate": 4.9572933597250424e-05, "loss": 0.9522, "step": 3305 }, { "epoch": 0.18, "learning_rate": 4.9571643752063894e-05, "loss": 0.8021, "step": 3310 }, { "epoch": 0.18, "learning_rate": 4.95703519788202e-05, "loss": 0.8531, "step": 3315 }, { "epoch": 0.18, "learning_rate": 4.95690582776207e-05, "loss": 1.0268, "step": 3320 }, { "epoch": 0.18, "learning_rate": 4.95677626485669e-05, "loss": 0.6887, "step": 3325 }, { "epoch": 0.18, "learning_rate": 4.956646509176047e-05, "loss": 0.6657, "step": 3330 }, { "epoch": 0.18, "learning_rate": 4.9565165607303225e-05, "loss": 0.6664, "step": 3335 }, { "epoch": 0.18, "learning_rate": 4.956386419529713e-05, "loss": 0.8534, "step": 3340 }, { "epoch": 0.18, "learning_rate": 4.9562560855844295e-05, "loss": 0.7976, "step": 3345 }, { "epoch": 0.18, "learning_rate": 4.9561255589047e-05, "loss": 0.7612, "step": 3350 }, { "epoch": 0.18, "learning_rate": 4.955994839500766e-05, "loss": 0.8446, "step": 3355 }, { "epoch": 0.18, "learning_rate": 4.955863927382885e-05, "loss": 0.7642, "step": 3360 }, { "epoch": 0.18, "learning_rate": 4.955732822561329e-05, "loss": 0.7925, "step": 3365 }, { "epoch": 0.18, "learning_rate": 4.955601525046385e-05, "loss": 0.9055, "step": 3370 }, { "epoch": 0.18, "learning_rate": 4.955470034848355e-05, "loss": 0.6796, "step": 3375 }, { "epoch": 0.18, "learning_rate": 4.955338351977559e-05, "loss": 0.8158, "step": 3380 }, { "epoch": 0.18, "learning_rate": 4.9552064764443275e-05, "loss": 0.8094, "step": 3385 }, { "epoch": 0.18, "learning_rate": 4.955074408259008e-05, "loss": 0.8934, "step": 3390 }, { "epoch": 0.18, "learning_rate": 4.9549421474319654e-05, "loss": 0.8261, "step": 3395 }, { "epoch": 0.18, "learning_rate": 4.954809693973578e-05, "loss": 0.9384, "step": 3400 }, { "epoch": 0.18, "learning_rate": 4.954677047894236e-05, "loss": 0.9363, "step": 3405 }, { "epoch": 0.18, "learning_rate": 4.95454420920435e-05, "loss": 0.9239, "step": 3410 }, { "epoch": 0.18, "learning_rate": 4.9544111779143435e-05, "loss": 0.9831, "step": 3415 }, { "epoch": 0.18, "learning_rate": 4.954277954034654e-05, "loss": 0.7436, "step": 3420 }, { "epoch": 0.18, "learning_rate": 4.954144537575736e-05, "loss": 0.8792, "step": 3425 }, { "epoch": 0.18, "learning_rate": 4.954010928548057e-05, "loss": 0.8038, "step": 3430 }, { "epoch": 0.18, "learning_rate": 4.9538771269621024e-05, "loss": 0.9046, "step": 3435 }, { "epoch": 0.18, "learning_rate": 4.953743132828371e-05, "loss": 0.8244, "step": 3440 }, { "epoch": 0.18, "learning_rate": 4.953608946157376e-05, "loss": 0.9734, "step": 3445 }, { "epoch": 0.18, "learning_rate": 4.953474566959647e-05, "loss": 0.7814, "step": 3450 }, { "epoch": 0.18, "learning_rate": 4.9533399952457276e-05, "loss": 0.8054, "step": 3455 }, { "epoch": 0.19, "learning_rate": 4.953205231026179e-05, "loss": 0.7926, "step": 3460 }, { "epoch": 0.19, "learning_rate": 4.953070274311574e-05, "loss": 0.8162, "step": 3465 }, { "epoch": 0.19, "learning_rate": 4.9529351251125035e-05, "loss": 0.7372, "step": 3470 }, { "epoch": 0.19, "learning_rate": 4.9527997834395706e-05, "loss": 0.8506, "step": 3475 }, { "epoch": 0.19, "learning_rate": 4.952664249303397e-05, "loss": 1.0776, "step": 3480 }, { "epoch": 0.19, "learning_rate": 4.952528522714616e-05, "loss": 0.8315, "step": 3485 }, { "epoch": 0.19, "learning_rate": 4.9523926036838784e-05, "loss": 0.9055, "step": 3490 }, { "epoch": 0.19, "learning_rate": 4.952256492221849e-05, "loss": 0.7596, "step": 3495 }, { "epoch": 0.19, "learning_rate": 4.952120188339209e-05, "loss": 0.7789, "step": 3500 }, { "epoch": 0.19, "learning_rate": 4.9519836920466535e-05, "loss": 0.8056, "step": 3505 }, { "epoch": 0.19, "learning_rate": 4.951847003354892e-05, "loss": 0.7515, "step": 3510 }, { "epoch": 0.19, "learning_rate": 4.95171012227465e-05, "loss": 0.8561, "step": 3515 }, { "epoch": 0.19, "learning_rate": 4.951573048816668e-05, "loss": 0.8129, "step": 3520 }, { "epoch": 0.19, "learning_rate": 4.951435782991704e-05, "loss": 0.716, "step": 3525 }, { "epoch": 0.19, "learning_rate": 4.951298324810525e-05, "loss": 0.8735, "step": 3530 }, { "epoch": 0.19, "learning_rate": 4.95116067428392e-05, "loss": 0.8052, "step": 3535 }, { "epoch": 0.19, "learning_rate": 4.951022831422689e-05, "loss": 0.8611, "step": 3540 }, { "epoch": 0.19, "learning_rate": 4.9508847962376484e-05, "loss": 0.8507, "step": 3545 }, { "epoch": 0.19, "learning_rate": 4.9507465687396284e-05, "loss": 0.8489, "step": 3550 }, { "epoch": 0.19, "learning_rate": 4.9506081489394764e-05, "loss": 0.8963, "step": 3555 }, { "epoch": 0.19, "learning_rate": 4.950469536848053e-05, "loss": 0.8285, "step": 3560 }, { "epoch": 0.19, "learning_rate": 4.950330732476235e-05, "loss": 0.6962, "step": 3565 }, { "epoch": 0.19, "learning_rate": 4.9501917358349134e-05, "loss": 0.8291, "step": 3570 }, { "epoch": 0.19, "learning_rate": 4.9500525469349955e-05, "loss": 0.9397, "step": 3575 }, { "epoch": 0.19, "learning_rate": 4.949913165787403e-05, "loss": 0.8595, "step": 3580 }, { "epoch": 0.19, "learning_rate": 4.949773592403072e-05, "loss": 0.8645, "step": 3585 }, { "epoch": 0.19, "learning_rate": 4.949633826792955e-05, "loss": 0.6876, "step": 3590 }, { "epoch": 0.19, "learning_rate": 4.949493868968019e-05, "loss": 0.6638, "step": 3595 }, { "epoch": 0.19, "learning_rate": 4.9493537189392445e-05, "loss": 0.9154, "step": 3600 }, { "epoch": 0.19, "learning_rate": 4.949213376717631e-05, "loss": 0.6988, "step": 3605 }, { "epoch": 0.19, "learning_rate": 4.9490728423141896e-05, "loss": 0.716, "step": 3610 }, { "epoch": 0.19, "learning_rate": 4.9489321157399475e-05, "loss": 0.7978, "step": 3615 }, { "epoch": 0.19, "learning_rate": 4.948791197005947e-05, "loss": 0.866, "step": 3620 }, { "epoch": 0.19, "learning_rate": 4.948650086123245e-05, "loss": 0.9086, "step": 3625 }, { "epoch": 0.19, "learning_rate": 4.948508783102916e-05, "loss": 0.894, "step": 3630 }, { "epoch": 0.19, "learning_rate": 4.9483672879560454e-05, "loss": 0.8542, "step": 3635 }, { "epoch": 0.19, "learning_rate": 4.948225600693737e-05, "loss": 0.8554, "step": 3640 }, { "epoch": 0.19, "learning_rate": 4.948083721327108e-05, "loss": 0.8713, "step": 3645 }, { "epoch": 0.2, "learning_rate": 4.947941649867292e-05, "loss": 0.897, "step": 3650 }, { "epoch": 0.2, "learning_rate": 4.947799386325436e-05, "loss": 0.8087, "step": 3655 }, { "epoch": 0.2, "learning_rate": 4.947656930712703e-05, "loss": 0.8883, "step": 3660 }, { "epoch": 0.2, "learning_rate": 4.947514283040272e-05, "loss": 0.8465, "step": 3665 }, { "epoch": 0.2, "learning_rate": 4.947371443319335e-05, "loss": 0.8383, "step": 3670 }, { "epoch": 0.2, "learning_rate": 4.9472284115611005e-05, "loss": 0.8494, "step": 3675 }, { "epoch": 0.2, "learning_rate": 4.947085187776792e-05, "loss": 0.7423, "step": 3680 }, { "epoch": 0.2, "learning_rate": 4.9469417719776477e-05, "loss": 0.7621, "step": 3685 }, { "epoch": 0.2, "learning_rate": 4.9467981641749216e-05, "loss": 0.7575, "step": 3690 }, { "epoch": 0.2, "learning_rate": 4.9466543643798805e-05, "loss": 0.8, "step": 3695 }, { "epoch": 0.2, "learning_rate": 4.9465103726038096e-05, "loss": 0.7279, "step": 3700 }, { "epoch": 0.2, "learning_rate": 4.946366188858006e-05, "loss": 0.9188, "step": 3705 }, { "epoch": 0.2, "learning_rate": 4.9462218131537845e-05, "loss": 0.901, "step": 3710 }, { "epoch": 0.2, "learning_rate": 4.946077245502474e-05, "loss": 0.8387, "step": 3715 }, { "epoch": 0.2, "learning_rate": 4.9459324859154167e-05, "loss": 0.7089, "step": 3720 }, { "epoch": 0.2, "learning_rate": 4.945787534403973e-05, "loss": 0.8077, "step": 3725 }, { "epoch": 0.2, "learning_rate": 4.945642390979516e-05, "loss": 0.8124, "step": 3730 }, { "epoch": 0.2, "learning_rate": 4.945497055653434e-05, "loss": 0.8187, "step": 3735 }, { "epoch": 0.2, "learning_rate": 4.945351528437132e-05, "loss": 0.8432, "step": 3740 }, { "epoch": 0.2, "learning_rate": 4.945205809342029e-05, "loss": 1.0281, "step": 3745 }, { "epoch": 0.2, "learning_rate": 4.945059898379559e-05, "loss": 0.946, "step": 3750 }, { "epoch": 0.2, "learning_rate": 4.944913795561171e-05, "loss": 0.9212, "step": 3755 }, { "epoch": 0.2, "learning_rate": 4.9447675008983295e-05, "loss": 0.9404, "step": 3760 }, { "epoch": 0.2, "learning_rate": 4.9446210144025134e-05, "loss": 0.8842, "step": 3765 }, { "epoch": 0.2, "learning_rate": 4.944474336085217e-05, "loss": 0.7175, "step": 3770 }, { "epoch": 0.2, "learning_rate": 4.94432746595795e-05, "loss": 0.8221, "step": 3775 }, { "epoch": 0.2, "learning_rate": 4.944180404032236e-05, "loss": 0.6941, "step": 3780 }, { "epoch": 0.2, "learning_rate": 4.9440331503196165e-05, "loss": 0.9288, "step": 3785 }, { "epoch": 0.2, "learning_rate": 4.9438857048316434e-05, "loss": 0.7445, "step": 3790 }, { "epoch": 0.2, "learning_rate": 4.943738067579888e-05, "loss": 0.8878, "step": 3795 }, { "epoch": 0.2, "learning_rate": 4.943590238575935e-05, "loss": 0.8777, "step": 3800 }, { "epoch": 0.2, "learning_rate": 4.943442217831382e-05, "loss": 0.9116, "step": 3805 }, { "epoch": 0.2, "learning_rate": 4.943294005357846e-05, "loss": 0.9225, "step": 3810 }, { "epoch": 0.2, "learning_rate": 4.943145601166956e-05, "loss": 0.8456, "step": 3815 }, { "epoch": 0.2, "learning_rate": 4.942997005270357e-05, "loss": 0.7643, "step": 3820 }, { "epoch": 0.2, "learning_rate": 4.942848217679709e-05, "loss": 0.7678, "step": 3825 }, { "epoch": 0.2, "learning_rate": 4.942699238406685e-05, "loss": 0.796, "step": 3830 }, { "epoch": 0.21, "learning_rate": 4.9425500674629775e-05, "loss": 0.9026, "step": 3835 }, { "epoch": 0.21, "learning_rate": 4.9424007048602905e-05, "loss": 0.8451, "step": 3840 }, { "epoch": 0.21, "learning_rate": 4.942251150610343e-05, "loss": 0.8219, "step": 3845 }, { "epoch": 0.21, "learning_rate": 4.9421014047248716e-05, "loss": 0.758, "step": 3850 }, { "epoch": 0.21, "learning_rate": 4.9419514672156255e-05, "loss": 0.8383, "step": 3855 }, { "epoch": 0.21, "learning_rate": 4.941801338094369e-05, "loss": 0.858, "step": 3860 }, { "epoch": 0.21, "learning_rate": 4.941651017372885e-05, "loss": 0.8233, "step": 3865 }, { "epoch": 0.21, "learning_rate": 4.9415005050629654e-05, "loss": 0.9621, "step": 3870 }, { "epoch": 0.21, "learning_rate": 4.941349801176422e-05, "loss": 0.9742, "step": 3875 }, { "epoch": 0.21, "learning_rate": 4.9411989057250805e-05, "loss": 0.8342, "step": 3880 }, { "epoch": 0.21, "learning_rate": 4.941047818720781e-05, "loss": 0.7426, "step": 3885 }, { "epoch": 0.21, "learning_rate": 4.9408965401753776e-05, "loss": 0.857, "step": 3890 }, { "epoch": 0.21, "learning_rate": 4.9407450701007416e-05, "loss": 0.8559, "step": 3895 }, { "epoch": 0.21, "learning_rate": 4.9405934085087584e-05, "loss": 0.7515, "step": 3900 }, { "epoch": 0.21, "learning_rate": 4.940441555411328e-05, "loss": 0.8281, "step": 3905 }, { "epoch": 0.21, "learning_rate": 4.940289510820367e-05, "loss": 0.9546, "step": 3910 }, { "epoch": 0.21, "learning_rate": 4.9401372747478035e-05, "loss": 0.8277, "step": 3915 }, { "epoch": 0.21, "learning_rate": 4.939984847205585e-05, "loss": 0.9051, "step": 3920 }, { "epoch": 0.21, "learning_rate": 4.939832228205672e-05, "loss": 0.9353, "step": 3925 }, { "epoch": 0.21, "learning_rate": 4.9396794177600384e-05, "loss": 0.928, "step": 3930 }, { "epoch": 0.21, "learning_rate": 4.9395264158806764e-05, "loss": 0.7046, "step": 3935 }, { "epoch": 0.21, "learning_rate": 4.9393732225795906e-05, "loss": 0.7513, "step": 3940 }, { "epoch": 0.21, "learning_rate": 4.939219837868801e-05, "loss": 0.7943, "step": 3945 }, { "epoch": 0.21, "learning_rate": 4.939066261760346e-05, "loss": 0.7632, "step": 3950 }, { "epoch": 0.21, "learning_rate": 4.9389124942662727e-05, "loss": 0.7753, "step": 3955 }, { "epoch": 0.21, "learning_rate": 4.938758535398649e-05, "loss": 0.8438, "step": 3960 }, { "epoch": 0.21, "learning_rate": 4.938604385169555e-05, "loss": 0.8776, "step": 3965 }, { "epoch": 0.21, "learning_rate": 4.938450043591085e-05, "loss": 0.6746, "step": 3970 }, { "epoch": 0.21, "learning_rate": 4.938295510675352e-05, "loss": 0.7182, "step": 3975 }, { "epoch": 0.21, "learning_rate": 4.938140786434481e-05, "loss": 0.9462, "step": 3980 }, { "epoch": 0.21, "learning_rate": 4.937985870880612e-05, "loss": 0.8194, "step": 3985 }, { "epoch": 0.21, "learning_rate": 4.9378307640259005e-05, "loss": 1.0525, "step": 3990 }, { "epoch": 0.21, "learning_rate": 4.937675465882517e-05, "loss": 0.8388, "step": 3995 }, { "epoch": 0.21, "learning_rate": 4.93751997646265e-05, "loss": 0.8251, "step": 4000 }, { "epoch": 0.21, "learning_rate": 4.937364295778497e-05, "loss": 0.7671, "step": 4005 }, { "epoch": 0.21, "learning_rate": 4.937208423842276e-05, "loss": 1.005, "step": 4010 }, { "epoch": 0.21, "learning_rate": 4.9370523606662155e-05, "loss": 0.9077, "step": 4015 }, { "epoch": 0.22, "learning_rate": 4.9368961062625626e-05, "loss": 0.943, "step": 4020 }, { "epoch": 0.22, "learning_rate": 4.9367396606435787e-05, "loss": 0.7335, "step": 4025 }, { "epoch": 0.22, "learning_rate": 4.9365830238215395e-05, "loss": 0.8139, "step": 4030 }, { "epoch": 0.22, "learning_rate": 4.9364261958087346e-05, "loss": 0.8767, "step": 4035 }, { "epoch": 0.22, "learning_rate": 4.9362691766174704e-05, "loss": 0.8758, "step": 4040 }, { "epoch": 0.22, "learning_rate": 4.9361119662600674e-05, "loss": 0.8697, "step": 4045 }, { "epoch": 0.22, "learning_rate": 4.9359545647488624e-05, "loss": 0.8138, "step": 4050 }, { "epoch": 0.22, "learning_rate": 4.935796972096205e-05, "loss": 0.8757, "step": 4055 }, { "epoch": 0.22, "learning_rate": 4.935639188314461e-05, "loss": 0.7957, "step": 4060 }, { "epoch": 0.22, "learning_rate": 4.935481213416012e-05, "loss": 0.8402, "step": 4065 }, { "epoch": 0.22, "learning_rate": 4.9353230474132536e-05, "loss": 0.8074, "step": 4070 }, { "epoch": 0.22, "learning_rate": 4.935164690318596e-05, "loss": 0.6974, "step": 4075 }, { "epoch": 0.22, "learning_rate": 4.935006142144465e-05, "loss": 0.8779, "step": 4080 }, { "epoch": 0.22, "learning_rate": 4.934847402903303e-05, "loss": 0.702, "step": 4085 }, { "epoch": 0.22, "learning_rate": 4.934688472607563e-05, "loss": 0.8912, "step": 4090 }, { "epoch": 0.22, "learning_rate": 4.9345293512697175e-05, "loss": 0.8372, "step": 4095 }, { "epoch": 0.22, "learning_rate": 4.934370038902253e-05, "loss": 0.7317, "step": 4100 }, { "epoch": 0.22, "learning_rate": 4.934210535517668e-05, "loss": 0.901, "step": 4105 }, { "epoch": 0.22, "learning_rate": 4.9340508411284804e-05, "loss": 0.7522, "step": 4110 }, { "epoch": 0.22, "learning_rate": 4.933890955747219e-05, "loss": 0.7643, "step": 4115 }, { "epoch": 0.22, "learning_rate": 4.93373087938643e-05, "loss": 0.866, "step": 4120 }, { "epoch": 0.22, "learning_rate": 4.9335706120586754e-05, "loss": 0.8287, "step": 4125 }, { "epoch": 0.22, "learning_rate": 4.9334101537765296e-05, "loss": 0.769, "step": 4130 }, { "epoch": 0.22, "learning_rate": 4.933249504552583e-05, "loss": 0.9201, "step": 4135 }, { "epoch": 0.22, "learning_rate": 4.933088664399442e-05, "loss": 0.6929, "step": 4140 }, { "epoch": 0.22, "learning_rate": 4.932927633329727e-05, "loss": 0.7547, "step": 4145 }, { "epoch": 0.22, "learning_rate": 4.932766411356074e-05, "loss": 0.7361, "step": 4150 }, { "epoch": 0.22, "learning_rate": 4.9326049984911326e-05, "loss": 0.7689, "step": 4155 }, { "epoch": 0.22, "learning_rate": 4.932443394747569e-05, "loss": 0.7908, "step": 4160 }, { "epoch": 0.22, "learning_rate": 4.9322816001380635e-05, "loss": 0.9189, "step": 4165 }, { "epoch": 0.22, "learning_rate": 4.9321196146753114e-05, "loss": 0.8301, "step": 4170 }, { "epoch": 0.22, "learning_rate": 4.931957438372024e-05, "loss": 0.8996, "step": 4175 }, { "epoch": 0.22, "learning_rate": 4.931795071240926e-05, "loss": 0.764, "step": 4180 }, { "epoch": 0.22, "learning_rate": 4.931632513294758e-05, "loss": 0.803, "step": 4185 }, { "epoch": 0.22, "learning_rate": 4.931469764546275e-05, "loss": 0.9746, "step": 4190 }, { "epoch": 0.22, "learning_rate": 4.9313068250082486e-05, "loss": 0.8741, "step": 4195 }, { "epoch": 0.22, "learning_rate": 4.9311436946934624e-05, "loss": 0.9009, "step": 4200 }, { "epoch": 0.22, "learning_rate": 4.930980373614718e-05, "loss": 0.7202, "step": 4205 }, { "epoch": 0.23, "learning_rate": 4.9308168617848304e-05, "loss": 0.8097, "step": 4210 }, { "epoch": 0.23, "learning_rate": 4.9306531592166294e-05, "loss": 0.6738, "step": 4215 }, { "epoch": 0.23, "learning_rate": 4.930489265922961e-05, "loss": 0.7737, "step": 4220 }, { "epoch": 0.23, "learning_rate": 4.930325181916684e-05, "loss": 0.8228, "step": 4225 }, { "epoch": 0.23, "learning_rate": 4.9301609072106756e-05, "loss": 0.8239, "step": 4230 }, { "epoch": 0.23, "learning_rate": 4.929996441817825e-05, "loss": 0.9161, "step": 4235 }, { "epoch": 0.23, "learning_rate": 4.9298317857510354e-05, "loss": 0.9567, "step": 4240 }, { "epoch": 0.23, "learning_rate": 4.9296669390232294e-05, "loss": 0.8271, "step": 4245 }, { "epoch": 0.23, "learning_rate": 4.929501901647341e-05, "loss": 0.6943, "step": 4250 }, { "epoch": 0.23, "learning_rate": 4.9293366736363205e-05, "loss": 0.6149, "step": 4255 }, { "epoch": 0.23, "learning_rate": 4.929171255003132e-05, "loss": 0.7123, "step": 4260 }, { "epoch": 0.23, "learning_rate": 4.929005645760756e-05, "loss": 0.8766, "step": 4265 }, { "epoch": 0.23, "learning_rate": 4.9288398459221875e-05, "loss": 0.9577, "step": 4270 }, { "epoch": 0.23, "learning_rate": 4.928673855500435e-05, "loss": 0.7385, "step": 4275 }, { "epoch": 0.23, "learning_rate": 4.9285076745085256e-05, "loss": 0.8711, "step": 4280 }, { "epoch": 0.23, "learning_rate": 4.9283413029594964e-05, "loss": 0.8776, "step": 4285 }, { "epoch": 0.23, "learning_rate": 4.928174740866404e-05, "loss": 0.8267, "step": 4290 }, { "epoch": 0.23, "learning_rate": 4.928007988242317e-05, "loss": 0.7509, "step": 4295 }, { "epoch": 0.23, "learning_rate": 4.92784104510032e-05, "loss": 1.0717, "step": 4300 }, { "epoch": 0.23, "learning_rate": 4.927673911453512e-05, "loss": 0.7941, "step": 4305 }, { "epoch": 0.23, "learning_rate": 4.9275065873150086e-05, "loss": 0.8065, "step": 4310 }, { "epoch": 0.23, "learning_rate": 4.9273390726979384e-05, "loss": 0.7789, "step": 4315 }, { "epoch": 0.23, "learning_rate": 4.9271713676154465e-05, "loss": 0.8482, "step": 4320 }, { "epoch": 0.23, "learning_rate": 4.927003472080691e-05, "loss": 0.9035, "step": 4325 }, { "epoch": 0.23, "learning_rate": 4.9268353861068475e-05, "loss": 0.8393, "step": 4330 }, { "epoch": 0.23, "learning_rate": 4.926667109707104e-05, "loss": 1.0194, "step": 4335 }, { "epoch": 0.23, "learning_rate": 4.9264986428946644e-05, "loss": 0.8048, "step": 4340 }, { "epoch": 0.23, "learning_rate": 4.926329985682749e-05, "loss": 0.8594, "step": 4345 }, { "epoch": 0.23, "learning_rate": 4.9261611380845915e-05, "loss": 0.8202, "step": 4350 }, { "epoch": 0.23, "learning_rate": 4.9259921001134404e-05, "loss": 0.8064, "step": 4355 }, { "epoch": 0.23, "learning_rate": 4.925822871782559e-05, "loss": 0.804, "step": 4360 }, { "epoch": 0.23, "learning_rate": 4.9256534531052265e-05, "loss": 0.9993, "step": 4365 }, { "epoch": 0.23, "learning_rate": 4.9254838440947374e-05, "loss": 0.904, "step": 4370 }, { "epoch": 0.23, "learning_rate": 4.9253140447644e-05, "loss": 0.8427, "step": 4375 }, { "epoch": 0.23, "learning_rate": 4.925144055127537e-05, "loss": 0.8224, "step": 4380 }, { "epoch": 0.23, "learning_rate": 4.9249738751974886e-05, "loss": 0.9162, "step": 4385 }, { "epoch": 0.23, "learning_rate": 4.9248035049876074e-05, "loss": 0.8402, "step": 4390 }, { "epoch": 0.24, "learning_rate": 4.9246329445112604e-05, "loss": 0.7632, "step": 4395 }, { "epoch": 0.24, "learning_rate": 4.9244621937818335e-05, "loss": 0.73, "step": 4400 }, { "epoch": 0.24, "learning_rate": 4.924291252812723e-05, "loss": 0.747, "step": 4405 }, { "epoch": 0.24, "learning_rate": 4.9241201216173435e-05, "loss": 0.7069, "step": 4410 }, { "epoch": 0.24, "learning_rate": 4.923948800209122e-05, "loss": 0.811, "step": 4415 }, { "epoch": 0.24, "learning_rate": 4.923777288601502e-05, "loss": 0.8599, "step": 4420 }, { "epoch": 0.24, "learning_rate": 4.923605586807941e-05, "loss": 0.8569, "step": 4425 }, { "epoch": 0.24, "learning_rate": 4.923433694841913e-05, "loss": 0.8376, "step": 4430 }, { "epoch": 0.24, "learning_rate": 4.9232616127169045e-05, "loss": 0.6783, "step": 4435 }, { "epoch": 0.24, "learning_rate": 4.923089340446419e-05, "loss": 0.7608, "step": 4440 }, { "epoch": 0.24, "learning_rate": 4.922916878043974e-05, "loss": 0.9222, "step": 4445 }, { "epoch": 0.24, "learning_rate": 4.922744225523102e-05, "loss": 0.8178, "step": 4450 }, { "epoch": 0.24, "learning_rate": 4.922571382897351e-05, "loss": 0.7488, "step": 4455 }, { "epoch": 0.24, "learning_rate": 4.922398350180282e-05, "loss": 0.9123, "step": 4460 }, { "epoch": 0.24, "learning_rate": 4.9222251273854734e-05, "loss": 0.7817, "step": 4465 }, { "epoch": 0.24, "learning_rate": 4.9220517145265175e-05, "loss": 0.8018, "step": 4470 }, { "epoch": 0.24, "learning_rate": 4.9218781116170215e-05, "loss": 0.6371, "step": 4475 }, { "epoch": 0.24, "learning_rate": 4.921704318670607e-05, "loss": 0.8798, "step": 4480 }, { "epoch": 0.24, "learning_rate": 4.921530335700911e-05, "loss": 0.789, "step": 4485 }, { "epoch": 0.24, "learning_rate": 4.9213561627215855e-05, "loss": 0.8795, "step": 4490 }, { "epoch": 0.24, "learning_rate": 4.921181799746297e-05, "loss": 0.7122, "step": 4495 }, { "epoch": 0.24, "learning_rate": 4.921007246788728e-05, "loss": 0.7823, "step": 4500 }, { "epoch": 0.24, "learning_rate": 4.920832503862575e-05, "loss": 0.8281, "step": 4505 }, { "epoch": 0.24, "learning_rate": 4.920657570981548e-05, "loss": 0.8589, "step": 4510 }, { "epoch": 0.24, "learning_rate": 4.920482448159375e-05, "loss": 0.916, "step": 4515 }, { "epoch": 0.24, "learning_rate": 4.920307135409797e-05, "loss": 0.762, "step": 4520 }, { "epoch": 0.24, "learning_rate": 4.92013163274657e-05, "loss": 0.7822, "step": 4525 }, { "epoch": 0.24, "learning_rate": 4.919955940183465e-05, "loss": 0.8692, "step": 4530 }, { "epoch": 0.24, "learning_rate": 4.9197800577342677e-05, "loss": 0.886, "step": 4535 }, { "epoch": 0.24, "learning_rate": 4.9196039854127805e-05, "loss": 0.8793, "step": 4540 }, { "epoch": 0.24, "learning_rate": 4.9194277232328176e-05, "loss": 0.7658, "step": 4545 }, { "epoch": 0.24, "learning_rate": 4.919251271208211e-05, "loss": 0.7981, "step": 4550 }, { "epoch": 0.24, "learning_rate": 4.9190746293528056e-05, "loss": 0.8411, "step": 4555 }, { "epoch": 0.24, "learning_rate": 4.918897797680462e-05, "loss": 0.9487, "step": 4560 }, { "epoch": 0.24, "learning_rate": 4.918720776205055e-05, "loss": 0.7555, "step": 4565 }, { "epoch": 0.24, "learning_rate": 4.9185435649404766e-05, "loss": 0.7755, "step": 4570 }, { "epoch": 0.24, "learning_rate": 4.91836616390063e-05, "loss": 0.914, "step": 4575 }, { "epoch": 0.25, "learning_rate": 4.9181885730994356e-05, "loss": 0.7696, "step": 4580 }, { "epoch": 0.25, "learning_rate": 4.9180107925508306e-05, "loss": 0.8708, "step": 4585 }, { "epoch": 0.25, "learning_rate": 4.9178328222687625e-05, "loss": 0.7438, "step": 4590 }, { "epoch": 0.25, "learning_rate": 4.917654662267197e-05, "loss": 0.8309, "step": 4595 }, { "epoch": 0.25, "learning_rate": 4.917476312560113e-05, "loss": 0.7166, "step": 4600 }, { "epoch": 0.25, "learning_rate": 4.917297773161506e-05, "loss": 0.8975, "step": 4605 }, { "epoch": 0.25, "learning_rate": 4.917119044085385e-05, "loss": 0.7804, "step": 4610 }, { "epoch": 0.25, "learning_rate": 4.9169401253457737e-05, "loss": 0.97, "step": 4615 }, { "epoch": 0.25, "learning_rate": 4.9167610169567127e-05, "loss": 0.9211, "step": 4620 }, { "epoch": 0.25, "learning_rate": 4.9165817189322546e-05, "loss": 0.8738, "step": 4625 }, { "epoch": 0.25, "learning_rate": 4.916402231286469e-05, "loss": 0.8577, "step": 4630 }, { "epoch": 0.25, "learning_rate": 4.9162225540334396e-05, "loss": 0.8111, "step": 4635 }, { "epoch": 0.25, "learning_rate": 4.9160426871872645e-05, "loss": 0.7975, "step": 4640 }, { "epoch": 0.25, "learning_rate": 4.9158626307620586e-05, "loss": 0.7764, "step": 4645 }, { "epoch": 0.25, "learning_rate": 4.91568238477195e-05, "loss": 0.9106, "step": 4650 }, { "epoch": 0.25, "learning_rate": 4.915501949231081e-05, "loss": 0.8469, "step": 4655 }, { "epoch": 0.25, "learning_rate": 4.91532132415361e-05, "loss": 0.9166, "step": 4660 }, { "epoch": 0.25, "learning_rate": 4.9151405095537116e-05, "loss": 0.7444, "step": 4665 }, { "epoch": 0.25, "learning_rate": 4.914959505445572e-05, "loss": 0.8629, "step": 4670 }, { "epoch": 0.25, "learning_rate": 4.9147783118433947e-05, "loss": 0.8151, "step": 4675 }, { "epoch": 0.25, "learning_rate": 4.9145969287613976e-05, "loss": 0.597, "step": 4680 }, { "epoch": 0.25, "learning_rate": 4.9144153562138126e-05, "loss": 0.7783, "step": 4685 }, { "epoch": 0.25, "learning_rate": 4.9142335942148873e-05, "loss": 0.7502, "step": 4690 }, { "epoch": 0.25, "learning_rate": 4.9140516427788845e-05, "loss": 0.7419, "step": 4695 }, { "epoch": 0.25, "learning_rate": 4.913869501920081e-05, "loss": 0.8133, "step": 4700 }, { "epoch": 0.25, "learning_rate": 4.913687171652769e-05, "loss": 0.7746, "step": 4705 }, { "epoch": 0.25, "learning_rate": 4.9135046519912545e-05, "loss": 0.7946, "step": 4710 }, { "epoch": 0.25, "learning_rate": 4.91332194294986e-05, "loss": 0.8155, "step": 4715 }, { "epoch": 0.25, "learning_rate": 4.9131390445429224e-05, "loss": 0.8734, "step": 4720 }, { "epoch": 0.25, "learning_rate": 4.9129559567847926e-05, "loss": 0.732, "step": 4725 }, { "epoch": 0.25, "learning_rate": 4.9127726796898367e-05, "loss": 0.8291, "step": 4730 }, { "epoch": 0.25, "learning_rate": 4.912589213272436e-05, "loss": 0.6404, "step": 4735 }, { "epoch": 0.25, "learning_rate": 4.912405557546987e-05, "loss": 0.8591, "step": 4740 }, { "epoch": 0.25, "learning_rate": 4.9122217125279e-05, "loss": 0.6889, "step": 4745 }, { "epoch": 0.25, "learning_rate": 4.912037678229602e-05, "loss": 0.9284, "step": 4750 }, { "epoch": 0.25, "learning_rate": 4.9118534546665314e-05, "loss": 0.7802, "step": 4755 }, { "epoch": 0.25, "learning_rate": 4.911669041853145e-05, "loss": 0.9108, "step": 4760 }, { "epoch": 0.25, "learning_rate": 4.9114844398039125e-05, "loss": 0.7587, "step": 4765 }, { "epoch": 0.26, "learning_rate": 4.91129964853332e-05, "loss": 0.9941, "step": 4770 }, { "epoch": 0.26, "learning_rate": 4.911114668055867e-05, "loss": 0.884, "step": 4775 }, { "epoch": 0.26, "learning_rate": 4.9109294983860675e-05, "loss": 0.8066, "step": 4780 }, { "epoch": 0.26, "learning_rate": 4.9107441395384526e-05, "loss": 0.8286, "step": 4785 }, { "epoch": 0.26, "learning_rate": 4.910558591527565e-05, "loss": 0.7533, "step": 4790 }, { "epoch": 0.26, "learning_rate": 4.910372854367966e-05, "loss": 0.9376, "step": 4795 }, { "epoch": 0.26, "learning_rate": 4.910186928074229e-05, "loss": 0.8105, "step": 4800 }, { "epoch": 0.26, "learning_rate": 4.910000812660942e-05, "loss": 0.679, "step": 4805 }, { "epoch": 0.26, "learning_rate": 4.90981450814271e-05, "loss": 0.7108, "step": 4810 }, { "epoch": 0.26, "learning_rate": 4.9096280145341514e-05, "loss": 0.8678, "step": 4815 }, { "epoch": 0.26, "learning_rate": 4.9094413318499e-05, "loss": 0.9949, "step": 4820 }, { "epoch": 0.26, "learning_rate": 4.909254460104604e-05, "loss": 0.8043, "step": 4825 }, { "epoch": 0.26, "learning_rate": 4.9090673993129265e-05, "loss": 0.7961, "step": 4830 }, { "epoch": 0.26, "learning_rate": 4.9088801494895465e-05, "loss": 0.7947, "step": 4835 }, { "epoch": 0.26, "learning_rate": 4.908692710649156e-05, "loss": 0.7128, "step": 4840 }, { "epoch": 0.26, "learning_rate": 4.908505082806462e-05, "loss": 0.6772, "step": 4845 }, { "epoch": 0.26, "learning_rate": 4.908317265976188e-05, "loss": 0.5842, "step": 4850 }, { "epoch": 0.26, "learning_rate": 4.9081292601730714e-05, "loss": 0.8447, "step": 4855 }, { "epoch": 0.26, "learning_rate": 4.907941065411864e-05, "loss": 0.8872, "step": 4860 }, { "epoch": 0.26, "learning_rate": 4.907752681707333e-05, "loss": 0.6919, "step": 4865 }, { "epoch": 0.26, "learning_rate": 4.9075641090742606e-05, "loss": 0.7229, "step": 4870 }, { "epoch": 0.26, "learning_rate": 4.907375347527443e-05, "loss": 0.9401, "step": 4875 }, { "epoch": 0.26, "learning_rate": 4.9071863970816925e-05, "loss": 0.8628, "step": 4880 }, { "epoch": 0.26, "learning_rate": 4.906997257751834e-05, "loss": 0.8033, "step": 4885 }, { "epoch": 0.26, "learning_rate": 4.90680792955271e-05, "loss": 0.844, "step": 4890 }, { "epoch": 0.26, "learning_rate": 4.906618412499176e-05, "loss": 0.9655, "step": 4895 }, { "epoch": 0.26, "learning_rate": 4.906428706606102e-05, "loss": 0.9286, "step": 4900 }, { "epoch": 0.26, "learning_rate": 4.906238811888375e-05, "loss": 0.8596, "step": 4905 }, { "epoch": 0.26, "learning_rate": 4.9060487283608945e-05, "loss": 0.7534, "step": 4910 }, { "epoch": 0.26, "learning_rate": 4.905858456038577e-05, "loss": 0.7926, "step": 4915 }, { "epoch": 0.26, "learning_rate": 4.90566799493635e-05, "loss": 0.7473, "step": 4920 }, { "epoch": 0.26, "learning_rate": 4.905477345069161e-05, "loss": 0.8419, "step": 4925 }, { "epoch": 0.26, "learning_rate": 4.905286506451968e-05, "loss": 0.7639, "step": 4930 }, { "epoch": 0.26, "learning_rate": 4.905095479099747e-05, "loss": 0.8235, "step": 4935 }, { "epoch": 0.26, "learning_rate": 4.904904263027486e-05, "loss": 0.7668, "step": 4940 }, { "epoch": 0.26, "learning_rate": 4.904712858250189e-05, "loss": 0.7284, "step": 4945 }, { "epoch": 0.26, "learning_rate": 4.904521264782875e-05, "loss": 0.7893, "step": 4950 }, { "epoch": 0.27, "learning_rate": 4.904329482640579e-05, "loss": 0.8401, "step": 4955 }, { "epoch": 0.27, "learning_rate": 4.904137511838348e-05, "loss": 0.96, "step": 4960 }, { "epoch": 0.27, "learning_rate": 4.903945352391247e-05, "loss": 0.8928, "step": 4965 }, { "epoch": 0.27, "learning_rate": 4.903753004314352e-05, "loss": 0.8379, "step": 4970 }, { "epoch": 0.27, "learning_rate": 4.9035604676227574e-05, "loss": 0.8085, "step": 4975 }, { "epoch": 0.27, "learning_rate": 4.903367742331571e-05, "loss": 0.7874, "step": 4980 }, { "epoch": 0.27, "learning_rate": 4.9031748284559145e-05, "loss": 0.6945, "step": 4985 }, { "epoch": 0.27, "learning_rate": 4.902981726010926e-05, "loss": 0.9188, "step": 4990 }, { "epoch": 0.27, "learning_rate": 4.902788435011757e-05, "loss": 0.8353, "step": 4995 }, { "epoch": 0.27, "learning_rate": 4.9025949554735736e-05, "loss": 0.8942, "step": 5000 }, { "epoch": 0.27, "learning_rate": 4.90240128741156e-05, "loss": 0.6747, "step": 5005 }, { "epoch": 0.27, "learning_rate": 4.90220743084091e-05, "loss": 0.6895, "step": 5010 }, { "epoch": 0.27, "learning_rate": 4.902013385776838e-05, "loss": 0.9258, "step": 5015 }, { "epoch": 0.27, "learning_rate": 4.901819152234567e-05, "loss": 0.678, "step": 5020 }, { "epoch": 0.27, "learning_rate": 4.9016247302293396e-05, "loss": 0.8835, "step": 5025 }, { "epoch": 0.27, "learning_rate": 4.90143011977641e-05, "loss": 0.934, "step": 5030 }, { "epoch": 0.27, "learning_rate": 4.901235320891051e-05, "loss": 0.7437, "step": 5035 }, { "epoch": 0.27, "learning_rate": 4.901040333588545e-05, "loss": 0.9084, "step": 5040 }, { "epoch": 0.27, "learning_rate": 4.900845157884195e-05, "loss": 1.0283, "step": 5045 }, { "epoch": 0.27, "learning_rate": 4.900649793793313e-05, "loss": 1.0257, "step": 5050 }, { "epoch": 0.27, "learning_rate": 4.9004542413312305e-05, "loss": 0.8054, "step": 5055 }, { "epoch": 0.27, "learning_rate": 4.9002585005132914e-05, "loss": 0.7625, "step": 5060 }, { "epoch": 0.27, "learning_rate": 4.9000625713548545e-05, "loss": 0.7329, "step": 5065 }, { "epoch": 0.27, "learning_rate": 4.899866453871294e-05, "loss": 0.7228, "step": 5070 }, { "epoch": 0.27, "learning_rate": 4.899670148077998e-05, "loss": 0.8376, "step": 5075 }, { "epoch": 0.27, "learning_rate": 4.899473653990371e-05, "loss": 0.9449, "step": 5080 }, { "epoch": 0.27, "learning_rate": 4.899276971623831e-05, "loss": 0.9834, "step": 5085 }, { "epoch": 0.27, "learning_rate": 4.8990801009938103e-05, "loss": 0.8887, "step": 5090 }, { "epoch": 0.27, "learning_rate": 4.898883042115758e-05, "loss": 0.8239, "step": 5095 }, { "epoch": 0.27, "learning_rate": 4.898685795005135e-05, "loss": 0.7858, "step": 5100 }, { "epoch": 0.27, "learning_rate": 4.89848835967742e-05, "loss": 0.9186, "step": 5105 }, { "epoch": 0.27, "learning_rate": 4.898290736148105e-05, "loss": 0.7272, "step": 5110 }, { "epoch": 0.27, "learning_rate": 4.898092924432695e-05, "loss": 0.606, "step": 5115 }, { "epoch": 0.27, "learning_rate": 4.897894924546714e-05, "loss": 0.7839, "step": 5120 }, { "epoch": 0.27, "learning_rate": 4.897696736505698e-05, "loss": 0.7314, "step": 5125 }, { "epoch": 0.27, "learning_rate": 4.8974983603251964e-05, "loss": 0.9404, "step": 5130 }, { "epoch": 0.27, "learning_rate": 4.8972997960207776e-05, "loss": 0.8616, "step": 5135 }, { "epoch": 0.27, "learning_rate": 4.897101043608021e-05, "loss": 0.7068, "step": 5140 }, { "epoch": 0.28, "learning_rate": 4.896902103102522e-05, "loss": 0.79, "step": 5145 }, { "epoch": 0.28, "learning_rate": 4.896702974519891e-05, "loss": 0.8714, "step": 5150 }, { "epoch": 0.28, "learning_rate": 4.8965036578757536e-05, "loss": 0.7654, "step": 5155 }, { "epoch": 0.28, "learning_rate": 4.896304153185748e-05, "loss": 0.8567, "step": 5160 }, { "epoch": 0.28, "learning_rate": 4.8961044604655304e-05, "loss": 0.8752, "step": 5165 }, { "epoch": 0.28, "learning_rate": 4.895904579730769e-05, "loss": 0.7139, "step": 5170 }, { "epoch": 0.28, "learning_rate": 4.8957045109971476e-05, "loss": 0.936, "step": 5175 }, { "epoch": 0.28, "learning_rate": 4.895504254280366e-05, "loss": 0.6209, "step": 5180 }, { "epoch": 0.28, "learning_rate": 4.8953038095961366e-05, "loss": 0.9938, "step": 5185 }, { "epoch": 0.28, "learning_rate": 4.895103176960188e-05, "loss": 0.6716, "step": 5190 }, { "epoch": 0.28, "learning_rate": 4.894902356388263e-05, "loss": 0.9064, "step": 5195 }, { "epoch": 0.28, "learning_rate": 4.894701347896121e-05, "loss": 0.7788, "step": 5200 }, { "epoch": 0.28, "learning_rate": 4.894500151499532e-05, "loss": 0.7092, "step": 5205 }, { "epoch": 0.28, "learning_rate": 4.894298767214285e-05, "loss": 0.8147, "step": 5210 }, { "epoch": 0.28, "learning_rate": 4.894097195056181e-05, "loss": 0.9334, "step": 5215 }, { "epoch": 0.28, "learning_rate": 4.893895435041037e-05, "loss": 0.7708, "step": 5220 }, { "epoch": 0.28, "learning_rate": 4.8936934871846854e-05, "loss": 0.8733, "step": 5225 }, { "epoch": 0.28, "learning_rate": 4.8934913515029703e-05, "loss": 0.8391, "step": 5230 }, { "epoch": 0.28, "learning_rate": 4.893289028011755e-05, "loss": 0.7536, "step": 5235 }, { "epoch": 0.28, "learning_rate": 4.8930865167269126e-05, "loss": 0.8303, "step": 5240 }, { "epoch": 0.28, "learning_rate": 4.8928838176643357e-05, "loss": 0.8649, "step": 5245 }, { "epoch": 0.28, "learning_rate": 4.892680930839929e-05, "loss": 0.8889, "step": 5250 }, { "epoch": 0.28, "learning_rate": 4.892477856269611e-05, "loss": 0.9678, "step": 5255 }, { "epoch": 0.28, "learning_rate": 4.892274593969318e-05, "loss": 0.7796, "step": 5260 }, { "epoch": 0.28, "learning_rate": 4.8920711439549985e-05, "loss": 0.6292, "step": 5265 }, { "epoch": 0.28, "learning_rate": 4.891867506242617e-05, "loss": 0.9088, "step": 5270 }, { "epoch": 0.28, "learning_rate": 4.8916636808481514e-05, "loss": 0.8349, "step": 5275 }, { "epoch": 0.28, "learning_rate": 4.8914596677875965e-05, "loss": 0.6519, "step": 5280 }, { "epoch": 0.28, "learning_rate": 4.8912554670769596e-05, "loss": 0.8597, "step": 5285 }, { "epoch": 0.28, "learning_rate": 4.891051078732263e-05, "loss": 0.7662, "step": 5290 }, { "epoch": 0.28, "learning_rate": 4.890846502769547e-05, "loss": 0.789, "step": 5295 }, { "epoch": 0.28, "learning_rate": 4.8906417392048606e-05, "loss": 0.8971, "step": 5300 }, { "epoch": 0.28, "learning_rate": 4.890436788054274e-05, "loss": 0.8498, "step": 5305 }, { "epoch": 0.28, "learning_rate": 4.890231649333867e-05, "loss": 0.854, "step": 5310 }, { "epoch": 0.28, "learning_rate": 4.8900263230597377e-05, "loss": 0.9026, "step": 5315 }, { "epoch": 0.28, "learning_rate": 4.889820809247996e-05, "loss": 0.7707, "step": 5320 }, { "epoch": 0.28, "learning_rate": 4.889615107914768e-05, "loss": 0.8193, "step": 5325 }, { "epoch": 0.29, "learning_rate": 4.889409219076195e-05, "loss": 0.8653, "step": 5330 }, { "epoch": 0.29, "learning_rate": 4.889203142748433e-05, "loss": 0.848, "step": 5335 }, { "epoch": 0.29, "learning_rate": 4.888996878947652e-05, "loss": 0.8134, "step": 5340 }, { "epoch": 0.29, "learning_rate": 4.888790427690035e-05, "loss": 0.6155, "step": 5345 }, { "epoch": 0.29, "learning_rate": 4.888583788991783e-05, "loss": 0.8795, "step": 5350 }, { "epoch": 0.29, "learning_rate": 4.8883769628691106e-05, "loss": 0.8331, "step": 5355 }, { "epoch": 0.29, "learning_rate": 4.888169949338246e-05, "loss": 0.7025, "step": 5360 }, { "epoch": 0.29, "learning_rate": 4.8879627484154335e-05, "loss": 0.8499, "step": 5365 }, { "epoch": 0.29, "learning_rate": 4.8877553601169314e-05, "loss": 0.7206, "step": 5370 }, { "epoch": 0.29, "learning_rate": 4.8875477844590125e-05, "loss": 1.0382, "step": 5375 }, { "epoch": 0.29, "learning_rate": 4.8873400214579634e-05, "loss": 0.7875, "step": 5380 }, { "epoch": 0.29, "learning_rate": 4.8871320711300893e-05, "loss": 0.7156, "step": 5385 }, { "epoch": 0.29, "learning_rate": 4.8869239334917054e-05, "loss": 0.9238, "step": 5390 }, { "epoch": 0.29, "learning_rate": 4.8867156085591444e-05, "loss": 0.6847, "step": 5395 }, { "epoch": 0.29, "learning_rate": 4.886507096348752e-05, "loss": 0.6943, "step": 5400 }, { "epoch": 0.29, "learning_rate": 4.8862983968768905e-05, "loss": 0.8144, "step": 5405 }, { "epoch": 0.29, "learning_rate": 4.886089510159936e-05, "loss": 0.8644, "step": 5410 }, { "epoch": 0.29, "learning_rate": 4.8858804362142775e-05, "loss": 0.8313, "step": 5415 }, { "epoch": 0.29, "learning_rate": 4.885671175056322e-05, "loss": 0.6607, "step": 5420 }, { "epoch": 0.29, "learning_rate": 4.8854617267024885e-05, "loss": 0.8752, "step": 5425 }, { "epoch": 0.29, "learning_rate": 4.885252091169214e-05, "loss": 0.7901, "step": 5430 }, { "epoch": 0.29, "learning_rate": 4.8850422684729436e-05, "loss": 0.9387, "step": 5435 }, { "epoch": 0.29, "learning_rate": 4.884832258630145e-05, "loss": 0.7988, "step": 5440 }, { "epoch": 0.29, "learning_rate": 4.884622061657297e-05, "loss": 0.7975, "step": 5445 }, { "epoch": 0.29, "learning_rate": 4.884411677570891e-05, "loss": 0.9318, "step": 5450 }, { "epoch": 0.29, "learning_rate": 4.884201106387436e-05, "loss": 0.8113, "step": 5455 }, { "epoch": 0.29, "learning_rate": 4.883990348123456e-05, "loss": 0.8999, "step": 5460 }, { "epoch": 0.29, "learning_rate": 4.8837794027954864e-05, "loss": 0.8144, "step": 5465 }, { "epoch": 0.29, "learning_rate": 4.883568270420081e-05, "loss": 0.6924, "step": 5470 }, { "epoch": 0.29, "learning_rate": 4.883356951013807e-05, "loss": 0.7643, "step": 5475 }, { "epoch": 0.29, "learning_rate": 4.8831454445932444e-05, "loss": 0.8673, "step": 5480 }, { "epoch": 0.29, "learning_rate": 4.88293375117499e-05, "loss": 0.7358, "step": 5485 }, { "epoch": 0.29, "learning_rate": 4.882721870775655e-05, "loss": 0.8188, "step": 5490 }, { "epoch": 0.29, "learning_rate": 4.882509803411865e-05, "loss": 0.7592, "step": 5495 }, { "epoch": 0.29, "learning_rate": 4.8822975491002606e-05, "loss": 0.8642, "step": 5500 }, { "epoch": 0.29, "learning_rate": 4.8820851078574955e-05, "loss": 0.8981, "step": 5505 }, { "epoch": 0.29, "learning_rate": 4.8818724797002404e-05, "loss": 0.7225, "step": 5510 }, { "epoch": 0.3, "learning_rate": 4.881659664645178e-05, "loss": 0.7061, "step": 5515 }, { "epoch": 0.3, "learning_rate": 4.8814466627090094e-05, "loss": 0.7144, "step": 5520 }, { "epoch": 0.3, "learning_rate": 4.881233473908447e-05, "loss": 0.774, "step": 5525 }, { "epoch": 0.3, "learning_rate": 4.8810200982602186e-05, "loss": 0.9191, "step": 5530 }, { "epoch": 0.3, "learning_rate": 4.880806535781069e-05, "loss": 0.7728, "step": 5535 }, { "epoch": 0.3, "learning_rate": 4.8805927864877526e-05, "loss": 0.7725, "step": 5540 }, { "epoch": 0.3, "learning_rate": 4.880378850397044e-05, "loss": 0.8261, "step": 5545 }, { "epoch": 0.3, "learning_rate": 4.88016472752573e-05, "loss": 0.7638, "step": 5550 }, { "epoch": 0.3, "learning_rate": 4.879950417890611e-05, "loss": 0.8227, "step": 5555 }, { "epoch": 0.3, "learning_rate": 4.879735921508504e-05, "loss": 0.9262, "step": 5560 }, { "epoch": 0.3, "learning_rate": 4.8795212383962396e-05, "loss": 0.8089, "step": 5565 }, { "epoch": 0.3, "learning_rate": 4.879306368570663e-05, "loss": 0.8948, "step": 5570 }, { "epoch": 0.3, "learning_rate": 4.879091312048635e-05, "loss": 0.7509, "step": 5575 }, { "epoch": 0.3, "learning_rate": 4.878876068847029e-05, "loss": 0.8049, "step": 5580 }, { "epoch": 0.3, "learning_rate": 4.8786606389827366e-05, "loss": 0.694, "step": 5585 }, { "epoch": 0.3, "learning_rate": 4.8784450224726606e-05, "loss": 0.7351, "step": 5590 }, { "epoch": 0.3, "learning_rate": 4.8782292193337195e-05, "loss": 0.8148, "step": 5595 }, { "epoch": 0.3, "learning_rate": 4.878013229582847e-05, "loss": 0.7501, "step": 5600 }, { "epoch": 0.3, "learning_rate": 4.8777970532369915e-05, "loss": 0.7474, "step": 5605 }, { "epoch": 0.3, "learning_rate": 4.877580690313115e-05, "loss": 0.9247, "step": 5610 }, { "epoch": 0.3, "learning_rate": 4.877364140828196e-05, "loss": 0.8913, "step": 5615 }, { "epoch": 0.3, "learning_rate": 4.877147404799224e-05, "loss": 0.8387, "step": 5620 }, { "epoch": 0.3, "learning_rate": 4.876930482243208e-05, "loss": 0.7817, "step": 5625 }, { "epoch": 0.3, "learning_rate": 4.876713373177168e-05, "loss": 0.7441, "step": 5630 }, { "epoch": 0.3, "learning_rate": 4.87649607761814e-05, "loss": 0.806, "step": 5635 }, { "epoch": 0.3, "learning_rate": 4.876278595583176e-05, "loss": 0.6691, "step": 5640 }, { "epoch": 0.3, "learning_rate": 4.876060927089338e-05, "loss": 0.918, "step": 5645 }, { "epoch": 0.3, "learning_rate": 4.875843072153709e-05, "loss": 0.9452, "step": 5650 }, { "epoch": 0.3, "learning_rate": 4.87562503079338e-05, "loss": 0.7853, "step": 5655 }, { "epoch": 0.3, "learning_rate": 4.875406803025463e-05, "loss": 0.7425, "step": 5660 }, { "epoch": 0.3, "learning_rate": 4.875188388867081e-05, "loss": 0.8371, "step": 5665 }, { "epoch": 0.3, "learning_rate": 4.87496978833537e-05, "loss": 0.7431, "step": 5670 }, { "epoch": 0.3, "learning_rate": 4.874751001447486e-05, "loss": 0.8835, "step": 5675 }, { "epoch": 0.3, "learning_rate": 4.874532028220595e-05, "loss": 0.8855, "step": 5680 }, { "epoch": 0.3, "learning_rate": 4.8743128686718797e-05, "loss": 0.8672, "step": 5685 }, { "epoch": 0.3, "learning_rate": 4.874093522818536e-05, "loss": 0.9386, "step": 5690 }, { "epoch": 0.3, "learning_rate": 4.873873990677775e-05, "loss": 0.9365, "step": 5695 }, { "epoch": 0.3, "learning_rate": 4.8736542722668234e-05, "loss": 0.8297, "step": 5700 }, { "epoch": 0.31, "learning_rate": 4.8734343676029216e-05, "loss": 0.7469, "step": 5705 }, { "epoch": 0.31, "learning_rate": 4.873214276703325e-05, "loss": 0.9627, "step": 5710 }, { "epoch": 0.31, "learning_rate": 4.8729939995853034e-05, "loss": 0.7226, "step": 5715 }, { "epoch": 0.31, "learning_rate": 4.872773536266142e-05, "loss": 0.8238, "step": 5720 }, { "epoch": 0.31, "learning_rate": 4.8725528867631375e-05, "loss": 0.8192, "step": 5725 }, { "epoch": 0.31, "learning_rate": 4.8723320510936064e-05, "loss": 0.8981, "step": 5730 }, { "epoch": 0.31, "learning_rate": 4.872111029274875e-05, "loss": 0.7524, "step": 5735 }, { "epoch": 0.31, "learning_rate": 4.871889821324286e-05, "loss": 0.6757, "step": 5740 }, { "epoch": 0.31, "learning_rate": 4.8716684272591984e-05, "loss": 0.9226, "step": 5745 }, { "epoch": 0.31, "learning_rate": 4.8714468470969836e-05, "loss": 0.9441, "step": 5750 }, { "epoch": 0.31, "learning_rate": 4.871225080855028e-05, "loss": 0.8892, "step": 5755 }, { "epoch": 0.31, "learning_rate": 4.8710031285507326e-05, "loss": 0.8726, "step": 5760 }, { "epoch": 0.31, "learning_rate": 4.870780990201514e-05, "loss": 0.6471, "step": 5765 }, { "epoch": 0.31, "learning_rate": 4.870558665824803e-05, "loss": 0.9857, "step": 5770 }, { "epoch": 0.31, "learning_rate": 4.870336155438044e-05, "loss": 0.8579, "step": 5775 }, { "epoch": 0.31, "learning_rate": 4.870113459058696e-05, "loss": 0.8928, "step": 5780 }, { "epoch": 0.31, "learning_rate": 4.869890576704234e-05, "loss": 0.6858, "step": 5785 }, { "epoch": 0.31, "learning_rate": 4.869667508392148e-05, "loss": 0.7689, "step": 5790 }, { "epoch": 0.31, "learning_rate": 4.86944425413994e-05, "loss": 0.9229, "step": 5795 }, { "epoch": 0.31, "learning_rate": 4.8692208139651274e-05, "loss": 0.7077, "step": 5800 }, { "epoch": 0.31, "learning_rate": 4.868997187885244e-05, "loss": 0.9604, "step": 5805 }, { "epoch": 0.31, "learning_rate": 4.868773375917837e-05, "loss": 0.8189, "step": 5810 }, { "epoch": 0.31, "learning_rate": 4.868549378080468e-05, "loss": 0.6778, "step": 5815 }, { "epoch": 0.31, "learning_rate": 4.868325194390714e-05, "loss": 0.8863, "step": 5820 }, { "epoch": 0.31, "learning_rate": 4.8681008248661645e-05, "loss": 0.7959, "step": 5825 }, { "epoch": 0.31, "learning_rate": 4.8678762695244265e-05, "loss": 0.7375, "step": 5830 }, { "epoch": 0.31, "learning_rate": 4.8676515283831195e-05, "loss": 0.7799, "step": 5835 }, { "epoch": 0.31, "learning_rate": 4.867426601459877e-05, "loss": 0.8646, "step": 5840 }, { "epoch": 0.31, "learning_rate": 4.8672014887723505e-05, "loss": 0.7989, "step": 5845 }, { "epoch": 0.31, "learning_rate": 4.866976190338203e-05, "loss": 0.7567, "step": 5850 }, { "epoch": 0.31, "learning_rate": 4.866750706175113e-05, "loss": 0.8682, "step": 5855 }, { "epoch": 0.31, "learning_rate": 4.8665250363007726e-05, "loss": 0.9152, "step": 5860 }, { "epoch": 0.31, "learning_rate": 4.866299180732891e-05, "loss": 0.8303, "step": 5865 }, { "epoch": 0.31, "learning_rate": 4.866073139489188e-05, "loss": 0.8081, "step": 5870 }, { "epoch": 0.31, "learning_rate": 4.8658469125874026e-05, "loss": 0.7795, "step": 5875 }, { "epoch": 0.31, "learning_rate": 4.8656205000452855e-05, "loss": 0.9127, "step": 5880 }, { "epoch": 0.31, "learning_rate": 4.8653939018806014e-05, "loss": 0.9329, "step": 5885 }, { "epoch": 0.32, "learning_rate": 4.8651671181111325e-05, "loss": 0.7977, "step": 5890 }, { "epoch": 0.32, "learning_rate": 4.864940148754673e-05, "loss": 0.8458, "step": 5895 }, { "epoch": 0.32, "learning_rate": 4.864712993829033e-05, "loss": 0.6276, "step": 5900 }, { "epoch": 0.32, "learning_rate": 4.864485653352034e-05, "loss": 0.8415, "step": 5905 }, { "epoch": 0.32, "learning_rate": 4.8642581273415183e-05, "loss": 0.6388, "step": 5910 }, { "epoch": 0.32, "learning_rate": 4.864030415815337e-05, "loss": 0.9421, "step": 5915 }, { "epoch": 0.32, "learning_rate": 4.863802518791358e-05, "loss": 0.9388, "step": 5920 }, { "epoch": 0.32, "learning_rate": 4.863574436287465e-05, "loss": 0.6694, "step": 5925 }, { "epoch": 0.32, "learning_rate": 4.863346168321553e-05, "loss": 0.9516, "step": 5930 }, { "epoch": 0.32, "learning_rate": 4.8631177149115346e-05, "loss": 0.9281, "step": 5935 }, { "epoch": 0.32, "learning_rate": 4.862889076075336e-05, "loss": 0.8082, "step": 5940 }, { "epoch": 0.32, "learning_rate": 4.862660251830897e-05, "loss": 0.8666, "step": 5945 }, { "epoch": 0.32, "learning_rate": 4.862431242196173e-05, "loss": 0.8533, "step": 5950 }, { "epoch": 0.32, "learning_rate": 4.8622020471891336e-05, "loss": 0.9231, "step": 5955 }, { "epoch": 0.32, "learning_rate": 4.861972666827763e-05, "loss": 0.6603, "step": 5960 }, { "epoch": 0.32, "learning_rate": 4.8617431011300596e-05, "loss": 0.7848, "step": 5965 }, { "epoch": 0.32, "learning_rate": 4.8615133501140374e-05, "loss": 0.8393, "step": 5970 }, { "epoch": 0.32, "learning_rate": 4.861283413797724e-05, "loss": 0.9132, "step": 5975 }, { "epoch": 0.32, "learning_rate": 4.8610532921991615e-05, "loss": 0.8903, "step": 5980 }, { "epoch": 0.32, "learning_rate": 4.860822985336407e-05, "loss": 0.8436, "step": 5985 }, { "epoch": 0.32, "learning_rate": 4.8605924932275315e-05, "loss": 0.8425, "step": 5990 }, { "epoch": 0.32, "learning_rate": 4.860361815890622e-05, "loss": 0.8307, "step": 5995 }, { "epoch": 0.32, "learning_rate": 4.860130953343778e-05, "loss": 0.8491, "step": 6000 }, { "epoch": 0.32, "learning_rate": 4.859899905605114e-05, "loss": 0.7904, "step": 6005 }, { "epoch": 0.32, "learning_rate": 4.8596686726927616e-05, "loss": 0.8774, "step": 6010 }, { "epoch": 0.32, "learning_rate": 4.8594372546248634e-05, "loss": 0.7607, "step": 6015 }, { "epoch": 0.32, "learning_rate": 4.859205651419578e-05, "loss": 0.8349, "step": 6020 }, { "epoch": 0.32, "learning_rate": 4.858973863095079e-05, "loss": 0.7619, "step": 6025 }, { "epoch": 0.32, "learning_rate": 4.858741889669554e-05, "loss": 0.7539, "step": 6030 }, { "epoch": 0.32, "learning_rate": 4.8585097311612046e-05, "loss": 0.8129, "step": 6035 }, { "epoch": 0.32, "learning_rate": 4.8582773875882494e-05, "loss": 0.8613, "step": 6040 }, { "epoch": 0.32, "learning_rate": 4.858044858968918e-05, "loss": 0.6603, "step": 6045 }, { "epoch": 0.32, "learning_rate": 4.857812145321455e-05, "loss": 1.1471, "step": 6050 }, { "epoch": 0.32, "learning_rate": 4.857579246664123e-05, "loss": 0.9696, "step": 6055 }, { "epoch": 0.32, "learning_rate": 4.8573461630151975e-05, "loss": 0.8304, "step": 6060 }, { "epoch": 0.32, "learning_rate": 4.8571128943929644e-05, "loss": 0.9174, "step": 6065 }, { "epoch": 0.32, "learning_rate": 4.8568794408157305e-05, "loss": 0.7374, "step": 6070 }, { "epoch": 0.32, "learning_rate": 4.856645802301812e-05, "loss": 0.7429, "step": 6075 }, { "epoch": 0.33, "learning_rate": 4.856411978869544e-05, "loss": 0.654, "step": 6080 }, { "epoch": 0.33, "learning_rate": 4.856177970537272e-05, "loss": 0.7194, "step": 6085 }, { "epoch": 0.33, "learning_rate": 4.85594377732336e-05, "loss": 0.8306, "step": 6090 }, { "epoch": 0.33, "learning_rate": 4.855709399246181e-05, "loss": 0.7886, "step": 6095 }, { "epoch": 0.33, "learning_rate": 4.8554748363241286e-05, "loss": 0.8086, "step": 6100 }, { "epoch": 0.33, "learning_rate": 4.8552400885756074e-05, "loss": 0.5731, "step": 6105 }, { "epoch": 0.33, "learning_rate": 4.8550051560190377e-05, "loss": 0.8254, "step": 6110 }, { "epoch": 0.33, "learning_rate": 4.8547700386728534e-05, "loss": 0.7931, "step": 6115 }, { "epoch": 0.33, "learning_rate": 4.854534736555504e-05, "loss": 0.8459, "step": 6120 }, { "epoch": 0.33, "learning_rate": 4.854299249685451e-05, "loss": 0.825, "step": 6125 }, { "epoch": 0.33, "learning_rate": 4.854063578081175e-05, "loss": 0.7507, "step": 6130 }, { "epoch": 0.33, "learning_rate": 4.853827721761167e-05, "loss": 0.7269, "step": 6135 }, { "epoch": 0.33, "learning_rate": 4.853591680743933e-05, "loss": 0.8861, "step": 6140 }, { "epoch": 0.33, "learning_rate": 4.853355455047996e-05, "loss": 0.9874, "step": 6145 }, { "epoch": 0.33, "learning_rate": 4.8531190446918916e-05, "loss": 0.7483, "step": 6150 }, { "epoch": 0.33, "learning_rate": 4.8528824496941695e-05, "loss": 0.8492, "step": 6155 }, { "epoch": 0.33, "learning_rate": 4.852645670073395e-05, "loss": 0.7936, "step": 6160 }, { "epoch": 0.33, "learning_rate": 4.852408705848146e-05, "loss": 0.8202, "step": 6165 }, { "epoch": 0.33, "learning_rate": 4.852171557037019e-05, "loss": 0.7985, "step": 6170 }, { "epoch": 0.33, "learning_rate": 4.85193422365862e-05, "loss": 0.8009, "step": 6175 }, { "epoch": 0.33, "learning_rate": 4.851696705731573e-05, "loss": 0.8705, "step": 6180 }, { "epoch": 0.33, "learning_rate": 4.851459003274515e-05, "loss": 0.8157, "step": 6185 }, { "epoch": 0.33, "learning_rate": 4.8512211163060974e-05, "loss": 0.8059, "step": 6190 }, { "epoch": 0.33, "learning_rate": 4.850983044844987e-05, "loss": 0.9143, "step": 6195 }, { "epoch": 0.33, "learning_rate": 4.8507447889098634e-05, "loss": 0.9241, "step": 6200 }, { "epoch": 0.33, "learning_rate": 4.850506348519424e-05, "loss": 0.7165, "step": 6205 }, { "epoch": 0.33, "learning_rate": 4.850267723692376e-05, "loss": 0.8503, "step": 6210 }, { "epoch": 0.33, "learning_rate": 4.8500289144474436e-05, "loss": 0.7503, "step": 6215 }, { "epoch": 0.33, "learning_rate": 4.849789920803367e-05, "loss": 0.959, "step": 6220 }, { "epoch": 0.33, "learning_rate": 4.849550742778899e-05, "loss": 0.8166, "step": 6225 }, { "epoch": 0.33, "learning_rate": 4.849311380392806e-05, "loss": 0.7547, "step": 6230 }, { "epoch": 0.33, "learning_rate": 4.8490718336638705e-05, "loss": 0.7028, "step": 6235 }, { "epoch": 0.33, "learning_rate": 4.84883210261089e-05, "loss": 0.9626, "step": 6240 }, { "epoch": 0.33, "learning_rate": 4.848592187252674e-05, "loss": 0.7794, "step": 6245 }, { "epoch": 0.33, "learning_rate": 4.8483520876080484e-05, "loss": 0.9036, "step": 6250 }, { "epoch": 0.33, "learning_rate": 4.848111803695853e-05, "loss": 0.8925, "step": 6255 }, { "epoch": 0.33, "learning_rate": 4.8478713355349426e-05, "loss": 0.8219, "step": 6260 }, { "epoch": 0.34, "learning_rate": 4.847630683144185e-05, "loss": 0.7414, "step": 6265 }, { "epoch": 0.34, "learning_rate": 4.847389846542464e-05, "loss": 0.8704, "step": 6270 }, { "epoch": 0.34, "learning_rate": 4.8471488257486766e-05, "loss": 0.7168, "step": 6275 }, { "epoch": 0.34, "learning_rate": 4.846907620781736e-05, "loss": 0.854, "step": 6280 }, { "epoch": 0.34, "learning_rate": 4.846666231660568e-05, "loss": 0.8836, "step": 6285 }, { "epoch": 0.34, "learning_rate": 4.8464246584041145e-05, "loss": 0.9883, "step": 6290 }, { "epoch": 0.34, "learning_rate": 4.84618290103133e-05, "loss": 0.752, "step": 6295 }, { "epoch": 0.34, "learning_rate": 4.845940959561185e-05, "loss": 0.7937, "step": 6300 }, { "epoch": 0.34, "learning_rate": 4.845698834012663e-05, "loss": 1.0104, "step": 6305 }, { "epoch": 0.34, "learning_rate": 4.8454565244047634e-05, "loss": 0.7503, "step": 6310 }, { "epoch": 0.34, "learning_rate": 4.8452140307565e-05, "loss": 0.7292, "step": 6315 }, { "epoch": 0.34, "learning_rate": 4.8449713530869e-05, "loss": 0.9591, "step": 6320 }, { "epoch": 0.34, "learning_rate": 4.844728491415005e-05, "loss": 0.7253, "step": 6325 }, { "epoch": 0.34, "learning_rate": 4.844485445759872e-05, "loss": 0.6955, "step": 6330 }, { "epoch": 0.34, "learning_rate": 4.844242216140573e-05, "loss": 0.8349, "step": 6335 }, { "epoch": 0.34, "learning_rate": 4.8439988025761915e-05, "loss": 0.7472, "step": 6340 }, { "epoch": 0.34, "learning_rate": 4.843755205085829e-05, "loss": 0.8337, "step": 6345 }, { "epoch": 0.34, "learning_rate": 4.843511423688599e-05, "loss": 0.768, "step": 6350 }, { "epoch": 0.34, "learning_rate": 4.843267458403631e-05, "loss": 0.7714, "step": 6355 }, { "epoch": 0.34, "learning_rate": 4.8430233092500666e-05, "loss": 0.7844, "step": 6360 }, { "epoch": 0.34, "learning_rate": 4.8427789762470654e-05, "loss": 0.9785, "step": 6365 }, { "epoch": 0.34, "learning_rate": 4.842534459413798e-05, "loss": 0.8883, "step": 6370 }, { "epoch": 0.34, "learning_rate": 4.8422897587694503e-05, "loss": 0.7626, "step": 6375 }, { "epoch": 0.34, "learning_rate": 4.842044874333225e-05, "loss": 0.726, "step": 6380 }, { "epoch": 0.34, "learning_rate": 4.8417998061243366e-05, "loss": 0.6785, "step": 6385 }, { "epoch": 0.34, "learning_rate": 4.841554554162014e-05, "loss": 0.8227, "step": 6390 }, { "epoch": 0.34, "learning_rate": 4.841309118465503e-05, "loss": 0.8717, "step": 6395 }, { "epoch": 0.34, "learning_rate": 4.84106349905406e-05, "loss": 0.551, "step": 6400 }, { "epoch": 0.34, "learning_rate": 4.84081769594696e-05, "loss": 0.8788, "step": 6405 }, { "epoch": 0.34, "learning_rate": 4.8405717091634894e-05, "loss": 0.64, "step": 6410 }, { "epoch": 0.34, "learning_rate": 4.8403255387229496e-05, "loss": 0.8521, "step": 6415 }, { "epoch": 0.34, "learning_rate": 4.840079184644658e-05, "loss": 0.8083, "step": 6420 }, { "epoch": 0.34, "learning_rate": 4.839832646947944e-05, "loss": 0.707, "step": 6425 }, { "epoch": 0.34, "learning_rate": 4.839585925652153e-05, "loss": 0.7592, "step": 6430 }, { "epoch": 0.34, "learning_rate": 4.839339020776645e-05, "loss": 0.8081, "step": 6435 }, { "epoch": 0.34, "learning_rate": 4.839091932340793e-05, "loss": 0.7157, "step": 6440 }, { "epoch": 0.34, "learning_rate": 4.838844660363985e-05, "loss": 0.9523, "step": 6445 }, { "epoch": 0.35, "learning_rate": 4.838597204865625e-05, "loss": 0.8926, "step": 6450 }, { "epoch": 0.35, "learning_rate": 4.838349565865129e-05, "loss": 0.7616, "step": 6455 }, { "epoch": 0.35, "learning_rate": 4.8381017433819284e-05, "loss": 0.7135, "step": 6460 }, { "epoch": 0.35, "learning_rate": 4.837853737435471e-05, "loss": 0.79, "step": 6465 }, { "epoch": 0.35, "learning_rate": 4.8376055480452134e-05, "loss": 0.7665, "step": 6470 }, { "epoch": 0.35, "learning_rate": 4.8373571752306327e-05, "loss": 0.9513, "step": 6475 }, { "epoch": 0.35, "learning_rate": 4.837108619011217e-05, "loss": 0.7946, "step": 6480 }, { "epoch": 0.35, "learning_rate": 4.83685987940647e-05, "loss": 0.7588, "step": 6485 }, { "epoch": 0.35, "learning_rate": 4.8366109564359095e-05, "loss": 0.8488, "step": 6490 }, { "epoch": 0.35, "learning_rate": 4.836361850119069e-05, "loss": 0.7705, "step": 6495 }, { "epoch": 0.35, "learning_rate": 4.836112560475492e-05, "loss": 0.8837, "step": 6500 }, { "epoch": 0.35, "learning_rate": 4.835863087524742e-05, "loss": 0.7212, "step": 6505 }, { "epoch": 0.35, "learning_rate": 4.835613431286394e-05, "loss": 0.8228, "step": 6510 }, { "epoch": 0.35, "learning_rate": 4.835363591780037e-05, "loss": 0.8566, "step": 6515 }, { "epoch": 0.35, "learning_rate": 4.835113569025275e-05, "loss": 0.7953, "step": 6520 }, { "epoch": 0.35, "learning_rate": 4.8348633630417265e-05, "loss": 0.8232, "step": 6525 }, { "epoch": 0.35, "learning_rate": 4.8346129738490256e-05, "loss": 0.8565, "step": 6530 }, { "epoch": 0.35, "learning_rate": 4.834362401466818e-05, "loss": 0.8472, "step": 6535 }, { "epoch": 0.35, "learning_rate": 4.834111645914766e-05, "loss": 0.8275, "step": 6540 }, { "epoch": 0.35, "learning_rate": 4.833860707212545e-05, "loss": 0.7908, "step": 6545 }, { "epoch": 0.35, "learning_rate": 4.833609585379847e-05, "loss": 0.7838, "step": 6550 }, { "epoch": 0.35, "learning_rate": 4.8333582804363756e-05, "loss": 0.7953, "step": 6555 }, { "epoch": 0.35, "learning_rate": 4.833106792401848e-05, "loss": 0.7829, "step": 6560 }, { "epoch": 0.35, "learning_rate": 4.832855121296001e-05, "loss": 0.8694, "step": 6565 }, { "epoch": 0.35, "learning_rate": 4.8326032671385804e-05, "loss": 0.7641, "step": 6570 }, { "epoch": 0.35, "learning_rate": 4.8323512299493495e-05, "loss": 0.8141, "step": 6575 }, { "epoch": 0.35, "learning_rate": 4.8320990097480834e-05, "loss": 0.7804, "step": 6580 }, { "epoch": 0.35, "learning_rate": 4.831846606554574e-05, "loss": 0.8158, "step": 6585 }, { "epoch": 0.35, "learning_rate": 4.831594020388627e-05, "loss": 0.9951, "step": 6590 }, { "epoch": 0.35, "learning_rate": 4.83134125127006e-05, "loss": 0.8265, "step": 6595 }, { "epoch": 0.35, "learning_rate": 4.83108829921871e-05, "loss": 0.7968, "step": 6600 }, { "epoch": 0.35, "learning_rate": 4.830835164254424e-05, "loss": 0.7983, "step": 6605 }, { "epoch": 0.35, "learning_rate": 4.830581846397063e-05, "loss": 0.7525, "step": 6610 }, { "epoch": 0.35, "learning_rate": 4.8303283456665056e-05, "loss": 0.8614, "step": 6615 }, { "epoch": 0.35, "learning_rate": 4.830074662082643e-05, "loss": 0.7994, "step": 6620 }, { "epoch": 0.35, "learning_rate": 4.8298207956653814e-05, "loss": 0.7816, "step": 6625 }, { "epoch": 0.35, "learning_rate": 4.82956674643464e-05, "loss": 0.9112, "step": 6630 }, { "epoch": 0.35, "learning_rate": 4.8293125144103535e-05, "loss": 0.7195, "step": 6635 }, { "epoch": 0.36, "learning_rate": 4.829058099612471e-05, "loss": 0.8317, "step": 6640 }, { "epoch": 0.36, "learning_rate": 4.828803502060956e-05, "loss": 0.7947, "step": 6645 }, { "epoch": 0.36, "learning_rate": 4.828548721775784e-05, "loss": 0.7425, "step": 6650 }, { "epoch": 0.36, "learning_rate": 4.828293758776949e-05, "loss": 0.8062, "step": 6655 }, { "epoch": 0.36, "learning_rate": 4.8280386130844555e-05, "loss": 0.8531, "step": 6660 }, { "epoch": 0.36, "learning_rate": 4.827783284718326e-05, "loss": 0.87, "step": 6665 }, { "epoch": 0.36, "learning_rate": 4.8275277736985924e-05, "loss": 0.773, "step": 6670 }, { "epoch": 0.36, "learning_rate": 4.8272720800453065e-05, "loss": 0.8636, "step": 6675 }, { "epoch": 0.36, "learning_rate": 4.82701620377853e-05, "loss": 0.8619, "step": 6680 }, { "epoch": 0.36, "learning_rate": 4.8267601449183416e-05, "loss": 0.7709, "step": 6685 }, { "epoch": 0.36, "learning_rate": 4.8265039034848324e-05, "loss": 0.7648, "step": 6690 }, { "epoch": 0.36, "learning_rate": 4.82624747949811e-05, "loss": 0.8286, "step": 6695 }, { "epoch": 0.36, "learning_rate": 4.825990872978296e-05, "loss": 0.8407, "step": 6700 }, { "epoch": 0.36, "learning_rate": 4.825734083945523e-05, "loss": 0.847, "step": 6705 }, { "epoch": 0.36, "learning_rate": 4.8254771124199416e-05, "loss": 0.845, "step": 6710 }, { "epoch": 0.36, "learning_rate": 4.825219958421715e-05, "loss": 0.8051, "step": 6715 }, { "epoch": 0.36, "learning_rate": 4.8249626219710234e-05, "loss": 0.757, "step": 6720 }, { "epoch": 0.36, "learning_rate": 4.8247051030880565e-05, "loss": 0.6884, "step": 6725 }, { "epoch": 0.36, "learning_rate": 4.8244474017930215e-05, "loss": 0.9335, "step": 6730 }, { "epoch": 0.36, "learning_rate": 4.82418951810614e-05, "loss": 0.7192, "step": 6735 }, { "epoch": 0.36, "learning_rate": 4.823931452047647e-05, "loss": 0.8617, "step": 6740 }, { "epoch": 0.36, "learning_rate": 4.823673203637794e-05, "loss": 0.8135, "step": 6745 }, { "epoch": 0.36, "learning_rate": 4.8234147728968414e-05, "loss": 0.853, "step": 6750 }, { "epoch": 0.36, "learning_rate": 4.8231561598450706e-05, "loss": 0.7494, "step": 6755 }, { "epoch": 0.36, "learning_rate": 4.8228973645027713e-05, "loss": 0.7688, "step": 6760 }, { "epoch": 0.36, "learning_rate": 4.822638386890253e-05, "loss": 0.7991, "step": 6765 }, { "epoch": 0.36, "learning_rate": 4.822379227027834e-05, "loss": 0.826, "step": 6770 }, { "epoch": 0.36, "learning_rate": 4.8221198849358527e-05, "loss": 0.8499, "step": 6775 }, { "epoch": 0.36, "learning_rate": 4.821860360634657e-05, "loss": 0.7452, "step": 6780 }, { "epoch": 0.36, "learning_rate": 4.8216006541446117e-05, "loss": 0.7455, "step": 6785 }, { "epoch": 0.36, "learning_rate": 4.8213407654860945e-05, "loss": 0.7513, "step": 6790 }, { "epoch": 0.36, "learning_rate": 4.821080694679498e-05, "loss": 0.8145, "step": 6795 }, { "epoch": 0.36, "learning_rate": 4.820820441745231e-05, "loss": 0.8254, "step": 6800 }, { "epoch": 0.36, "learning_rate": 4.820560006703711e-05, "loss": 0.7937, "step": 6805 }, { "epoch": 0.36, "learning_rate": 4.820299389575378e-05, "loss": 0.7425, "step": 6810 }, { "epoch": 0.36, "learning_rate": 4.820038590380678e-05, "loss": 0.8957, "step": 6815 }, { "epoch": 0.36, "learning_rate": 4.8197776091400766e-05, "loss": 0.8821, "step": 6820 }, { "epoch": 0.37, "learning_rate": 4.819516445874052e-05, "loss": 0.7784, "step": 6825 }, { "epoch": 0.37, "learning_rate": 4.819255100603097e-05, "loss": 0.618, "step": 6830 }, { "epoch": 0.37, "learning_rate": 4.818993573347719e-05, "loss": 0.8112, "step": 6835 }, { "epoch": 0.37, "learning_rate": 4.8187318641284376e-05, "loss": 0.9088, "step": 6840 }, { "epoch": 0.37, "learning_rate": 4.8184699729657895e-05, "loss": 0.8004, "step": 6845 }, { "epoch": 0.37, "learning_rate": 4.818207899880325e-05, "loss": 0.8575, "step": 6850 }, { "epoch": 0.37, "learning_rate": 4.817945644892606e-05, "loss": 0.7702, "step": 6855 }, { "epoch": 0.37, "learning_rate": 4.817683208023213e-05, "loss": 0.9045, "step": 6860 }, { "epoch": 0.37, "learning_rate": 4.817420589292738e-05, "loss": 0.8171, "step": 6865 }, { "epoch": 0.37, "learning_rate": 4.8171577887217866e-05, "loss": 0.9869, "step": 6870 }, { "epoch": 0.37, "learning_rate": 4.8168948063309814e-05, "loss": 0.8393, "step": 6875 }, { "epoch": 0.37, "learning_rate": 4.8166316421409574e-05, "loss": 0.8222, "step": 6880 }, { "epoch": 0.37, "learning_rate": 4.8163682961723646e-05, "loss": 0.8546, "step": 6885 }, { "epoch": 0.37, "learning_rate": 4.816104768445865e-05, "loss": 0.8804, "step": 6890 }, { "epoch": 0.37, "learning_rate": 4.815841058982139e-05, "loss": 0.9524, "step": 6895 }, { "epoch": 0.37, "learning_rate": 4.815577167801878e-05, "loss": 0.7327, "step": 6900 }, { "epoch": 0.37, "learning_rate": 4.8153130949257884e-05, "loss": 0.9141, "step": 6905 }, { "epoch": 0.37, "learning_rate": 4.8150488403745925e-05, "loss": 0.8751, "step": 6910 }, { "epoch": 0.37, "learning_rate": 4.8147844041690244e-05, "loss": 0.7618, "step": 6915 }, { "epoch": 0.37, "learning_rate": 4.814519786329833e-05, "loss": 0.6011, "step": 6920 }, { "epoch": 0.37, "learning_rate": 4.814254986877784e-05, "loss": 0.7932, "step": 6925 }, { "epoch": 0.37, "learning_rate": 4.813990005833653e-05, "loss": 0.8013, "step": 6930 }, { "epoch": 0.37, "learning_rate": 4.8137248432182334e-05, "loss": 0.9176, "step": 6935 }, { "epoch": 0.37, "learning_rate": 4.813459499052332e-05, "loss": 0.8428, "step": 6940 }, { "epoch": 0.37, "learning_rate": 4.8131939733567686e-05, "loss": 0.8801, "step": 6945 }, { "epoch": 0.37, "learning_rate": 4.812928266152379e-05, "loss": 0.9083, "step": 6950 }, { "epoch": 0.37, "learning_rate": 4.812662377460012e-05, "loss": 0.6987, "step": 6955 }, { "epoch": 0.37, "learning_rate": 4.812396307300531e-05, "loss": 0.8763, "step": 6960 }, { "epoch": 0.37, "learning_rate": 4.812130055694813e-05, "loss": 0.7517, "step": 6965 }, { "epoch": 0.37, "learning_rate": 4.811863622663752e-05, "loss": 0.9541, "step": 6970 }, { "epoch": 0.37, "learning_rate": 4.811597008228251e-05, "loss": 0.6921, "step": 6975 }, { "epoch": 0.37, "learning_rate": 4.811330212409233e-05, "loss": 0.6888, "step": 6980 }, { "epoch": 0.37, "learning_rate": 4.811063235227632e-05, "loss": 0.8537, "step": 6985 }, { "epoch": 0.37, "learning_rate": 4.810796076704396e-05, "loss": 0.7204, "step": 6990 }, { "epoch": 0.37, "learning_rate": 4.810528736860488e-05, "loss": 0.6506, "step": 6995 }, { "epoch": 0.37, "learning_rate": 4.810261215716887e-05, "loss": 0.6871, "step": 7000 }, { "epoch": 0.37, "learning_rate": 4.8099935132945825e-05, "loss": 0.7543, "step": 7005 }, { "epoch": 0.38, "learning_rate": 4.8097256296145816e-05, "loss": 0.8341, "step": 7010 }, { "epoch": 0.38, "learning_rate": 4.8094575646979036e-05, "loss": 0.8501, "step": 7015 }, { "epoch": 0.38, "learning_rate": 4.8091893185655826e-05, "loss": 0.8359, "step": 7020 }, { "epoch": 0.38, "learning_rate": 4.808920891238667e-05, "loss": 0.8277, "step": 7025 }, { "epoch": 0.38, "learning_rate": 4.8086522827382204e-05, "loss": 0.7674, "step": 7030 }, { "epoch": 0.38, "learning_rate": 4.808383493085319e-05, "loss": 0.7716, "step": 7035 }, { "epoch": 0.38, "learning_rate": 4.808114522301054e-05, "loss": 0.7889, "step": 7040 }, { "epoch": 0.38, "learning_rate": 4.8078453704065294e-05, "loss": 0.788, "step": 7045 }, { "epoch": 0.38, "learning_rate": 4.8075760374228665e-05, "loss": 0.7184, "step": 7050 }, { "epoch": 0.38, "learning_rate": 4.807306523371198e-05, "loss": 0.742, "step": 7055 }, { "epoch": 0.38, "learning_rate": 4.807036828272672e-05, "loss": 0.8931, "step": 7060 }, { "epoch": 0.38, "learning_rate": 4.80676695214845e-05, "loss": 0.835, "step": 7065 }, { "epoch": 0.38, "learning_rate": 4.8064968950197095e-05, "loss": 0.8265, "step": 7070 }, { "epoch": 0.38, "learning_rate": 4.8062266569076405e-05, "loss": 0.8526, "step": 7075 }, { "epoch": 0.38, "learning_rate": 4.8059562378334475e-05, "loss": 0.7497, "step": 7080 }, { "epoch": 0.38, "learning_rate": 4.805685637818349e-05, "loss": 0.7826, "step": 7085 }, { "epoch": 0.38, "learning_rate": 4.8054148568835786e-05, "loss": 0.842, "step": 7090 }, { "epoch": 0.38, "learning_rate": 4.8051438950503845e-05, "loss": 0.8569, "step": 7095 }, { "epoch": 0.38, "learning_rate": 4.804872752340027e-05, "loss": 0.887, "step": 7100 }, { "epoch": 0.38, "learning_rate": 4.804601428773782e-05, "loss": 0.7942, "step": 7105 }, { "epoch": 0.38, "learning_rate": 4.80432992437294e-05, "loss": 0.6507, "step": 7110 }, { "epoch": 0.38, "learning_rate": 4.804058239158804e-05, "loss": 0.9072, "step": 7115 }, { "epoch": 0.38, "learning_rate": 4.803786373152692e-05, "loss": 0.6998, "step": 7120 }, { "epoch": 0.38, "learning_rate": 4.8035143263759386e-05, "loss": 0.712, "step": 7125 }, { "epoch": 0.38, "learning_rate": 4.803242098849889e-05, "loss": 0.8511, "step": 7130 }, { "epoch": 0.38, "learning_rate": 4.802969690595903e-05, "loss": 0.8195, "step": 7135 }, { "epoch": 0.38, "learning_rate": 4.8026971016353584e-05, "loss": 0.6801, "step": 7140 }, { "epoch": 0.38, "learning_rate": 4.802424331989641e-05, "loss": 0.8112, "step": 7145 }, { "epoch": 0.38, "learning_rate": 4.8021513816801565e-05, "loss": 0.844, "step": 7150 }, { "epoch": 0.38, "learning_rate": 4.801878250728322e-05, "loss": 0.8558, "step": 7155 }, { "epoch": 0.38, "learning_rate": 4.801604939155569e-05, "loss": 0.7182, "step": 7160 }, { "epoch": 0.38, "learning_rate": 4.801331446983342e-05, "loss": 0.814, "step": 7165 }, { "epoch": 0.38, "learning_rate": 4.801057774233104e-05, "loss": 0.877, "step": 7170 }, { "epoch": 0.38, "learning_rate": 4.8007839209263276e-05, "loss": 0.8308, "step": 7175 }, { "epoch": 0.38, "learning_rate": 4.8005098870845e-05, "loss": 0.7684, "step": 7180 }, { "epoch": 0.38, "learning_rate": 4.800235672729125e-05, "loss": 0.8682, "step": 7185 }, { "epoch": 0.38, "learning_rate": 4.7999612778817196e-05, "loss": 0.9712, "step": 7190 }, { "epoch": 0.38, "learning_rate": 4.799686702563814e-05, "loss": 0.788, "step": 7195 }, { "epoch": 0.39, "learning_rate": 4.799411946796954e-05, "loss": 0.8992, "step": 7200 }, { "epoch": 0.39, "learning_rate": 4.799137010602698e-05, "loss": 0.9255, "step": 7205 }, { "epoch": 0.39, "learning_rate": 4.79886189400262e-05, "loss": 0.6577, "step": 7210 }, { "epoch": 0.39, "learning_rate": 4.798586597018306e-05, "loss": 0.6142, "step": 7215 }, { "epoch": 0.39, "learning_rate": 4.7983111196713605e-05, "loss": 0.7263, "step": 7220 }, { "epoch": 0.39, "learning_rate": 4.798035461983397e-05, "loss": 0.8851, "step": 7225 }, { "epoch": 0.39, "learning_rate": 4.797759623976045e-05, "loss": 0.8105, "step": 7230 }, { "epoch": 0.39, "learning_rate": 4.797483605670951e-05, "loss": 0.8346, "step": 7235 }, { "epoch": 0.39, "learning_rate": 4.797207407089772e-05, "loss": 0.7503, "step": 7240 }, { "epoch": 0.39, "learning_rate": 4.7969310282541794e-05, "loss": 0.8372, "step": 7245 }, { "epoch": 0.39, "learning_rate": 4.7966544691858615e-05, "loss": 0.8527, "step": 7250 }, { "epoch": 0.39, "learning_rate": 4.796377729906518e-05, "loss": 0.8144, "step": 7255 }, { "epoch": 0.39, "learning_rate": 4.796100810437864e-05, "loss": 0.8474, "step": 7260 }, { "epoch": 0.39, "learning_rate": 4.795823710801628e-05, "loss": 0.6952, "step": 7265 }, { "epoch": 0.39, "learning_rate": 4.795546431019555e-05, "loss": 0.9214, "step": 7270 }, { "epoch": 0.39, "learning_rate": 4.7952689711133994e-05, "loss": 0.7239, "step": 7275 }, { "epoch": 0.39, "learning_rate": 4.7949913311049345e-05, "loss": 0.9013, "step": 7280 }, { "epoch": 0.39, "learning_rate": 4.794713511015945e-05, "loss": 0.9118, "step": 7285 }, { "epoch": 0.39, "learning_rate": 4.794435510868231e-05, "loss": 0.7586, "step": 7290 }, { "epoch": 0.39, "learning_rate": 4.794157330683606e-05, "loss": 0.6804, "step": 7295 }, { "epoch": 0.39, "learning_rate": 4.7938789704838984e-05, "loss": 0.8301, "step": 7300 }, { "epoch": 0.39, "learning_rate": 4.7936004302909486e-05, "loss": 0.9335, "step": 7305 }, { "epoch": 0.39, "learning_rate": 4.793321710126615e-05, "loss": 0.8204, "step": 7310 }, { "epoch": 0.39, "learning_rate": 4.7930428100127664e-05, "loss": 0.8582, "step": 7315 }, { "epoch": 0.39, "learning_rate": 4.792763729971288e-05, "loss": 0.7994, "step": 7320 }, { "epoch": 0.39, "learning_rate": 4.792484470024078e-05, "loss": 0.662, "step": 7325 }, { "epoch": 0.39, "learning_rate": 4.792205030193049e-05, "loss": 0.7738, "step": 7330 }, { "epoch": 0.39, "learning_rate": 4.791925410500127e-05, "loss": 0.8079, "step": 7335 }, { "epoch": 0.39, "learning_rate": 4.791645610967254e-05, "loss": 0.71, "step": 7340 }, { "epoch": 0.39, "learning_rate": 4.791365631616384e-05, "loss": 0.6819, "step": 7345 }, { "epoch": 0.39, "learning_rate": 4.7910854724694864e-05, "loss": 0.7996, "step": 7350 }, { "epoch": 0.39, "learning_rate": 4.790805133548545e-05, "loss": 0.8071, "step": 7355 }, { "epoch": 0.39, "learning_rate": 4.790524614875557e-05, "loss": 0.8915, "step": 7360 }, { "epoch": 0.39, "learning_rate": 4.7902439164725334e-05, "loss": 0.9415, "step": 7365 }, { "epoch": 0.39, "learning_rate": 4.789963038361499e-05, "loss": 0.854, "step": 7370 }, { "epoch": 0.39, "learning_rate": 4.789681980564494e-05, "loss": 0.716, "step": 7375 }, { "epoch": 0.39, "learning_rate": 4.7894007431035726e-05, "loss": 0.7333, "step": 7380 }, { "epoch": 0.4, "learning_rate": 4.7891193260008026e-05, "loss": 0.7083, "step": 7385 }, { "epoch": 0.4, "learning_rate": 4.788837729278265e-05, "loss": 0.7627, "step": 7390 }, { "epoch": 0.4, "learning_rate": 4.788555952958056e-05, "loss": 0.9727, "step": 7395 }, { "epoch": 0.4, "learning_rate": 4.788273997062286e-05, "loss": 0.9049, "step": 7400 }, { "epoch": 0.4, "learning_rate": 4.7879918616130795e-05, "loss": 0.9316, "step": 7405 }, { "epoch": 0.4, "learning_rate": 4.7877095466325745e-05, "loss": 0.8453, "step": 7410 }, { "epoch": 0.4, "learning_rate": 4.787427052142923e-05, "loss": 0.8353, "step": 7415 }, { "epoch": 0.4, "learning_rate": 4.787144378166291e-05, "loss": 0.8048, "step": 7420 }, { "epoch": 0.4, "learning_rate": 4.7868615247248604e-05, "loss": 0.8047, "step": 7425 }, { "epoch": 0.4, "learning_rate": 4.786578491840825e-05, "loss": 0.7862, "step": 7430 }, { "epoch": 0.4, "learning_rate": 4.7862952795363925e-05, "loss": 0.7956, "step": 7435 }, { "epoch": 0.4, "learning_rate": 4.786011887833788e-05, "loss": 0.7657, "step": 7440 }, { "epoch": 0.4, "learning_rate": 4.7857283167552455e-05, "loss": 0.8208, "step": 7445 }, { "epoch": 0.4, "learning_rate": 4.785444566323018e-05, "loss": 0.9138, "step": 7450 }, { "epoch": 0.4, "learning_rate": 4.78516063655937e-05, "loss": 0.7994, "step": 7455 }, { "epoch": 0.4, "learning_rate": 4.78487652748658e-05, "loss": 0.7435, "step": 7460 }, { "epoch": 0.4, "learning_rate": 4.7845922391269423e-05, "loss": 0.7738, "step": 7465 }, { "epoch": 0.4, "learning_rate": 4.7843077715027626e-05, "loss": 0.7367, "step": 7470 }, { "epoch": 0.4, "learning_rate": 4.7840231246363634e-05, "loss": 0.831, "step": 7475 }, { "epoch": 0.4, "learning_rate": 4.783738298550079e-05, "loss": 0.7333, "step": 7480 }, { "epoch": 0.4, "learning_rate": 4.783453293266259e-05, "loss": 0.8165, "step": 7485 }, { "epoch": 0.4, "learning_rate": 4.7831681088072676e-05, "loss": 0.7677, "step": 7490 }, { "epoch": 0.4, "learning_rate": 4.782882745195482e-05, "loss": 0.72, "step": 7495 }, { "epoch": 0.4, "learning_rate": 4.782597202453293e-05, "loss": 0.6487, "step": 7500 }, { "epoch": 0.4, "learning_rate": 4.782311480603107e-05, "loss": 0.8733, "step": 7505 }, { "epoch": 0.4, "learning_rate": 4.782025579667343e-05, "loss": 0.7825, "step": 7510 }, { "epoch": 0.4, "learning_rate": 4.7817394996684354e-05, "loss": 0.7005, "step": 7515 }, { "epoch": 0.4, "learning_rate": 4.781453240628831e-05, "loss": 0.7942, "step": 7520 }, { "epoch": 0.4, "learning_rate": 4.781166802570994e-05, "loss": 0.8163, "step": 7525 }, { "epoch": 0.4, "learning_rate": 4.7808801855173976e-05, "loss": 0.9127, "step": 7530 }, { "epoch": 0.4, "learning_rate": 4.7805933894905324e-05, "loss": 0.8118, "step": 7535 }, { "epoch": 0.4, "learning_rate": 4.780306414512903e-05, "loss": 0.7944, "step": 7540 }, { "epoch": 0.4, "learning_rate": 4.780019260607027e-05, "loss": 0.7226, "step": 7545 }, { "epoch": 0.4, "learning_rate": 4.7797319277954366e-05, "loss": 0.8094, "step": 7550 }, { "epoch": 0.4, "learning_rate": 4.779444416100677e-05, "loss": 0.7522, "step": 7555 }, { "epoch": 0.4, "learning_rate": 4.7791567255453104e-05, "loss": 0.8632, "step": 7560 }, { "epoch": 0.4, "learning_rate": 4.7788688561519085e-05, "loss": 0.6781, "step": 7565 }, { "epoch": 0.4, "learning_rate": 4.778580807943061e-05, "loss": 0.8259, "step": 7570 }, { "epoch": 0.41, "learning_rate": 4.7782925809413695e-05, "loss": 0.6863, "step": 7575 }, { "epoch": 0.41, "learning_rate": 4.77800417516945e-05, "loss": 0.8084, "step": 7580 }, { "epoch": 0.41, "learning_rate": 4.777715590649935e-05, "loss": 0.7288, "step": 7585 }, { "epoch": 0.41, "learning_rate": 4.777426827405465e-05, "loss": 0.7919, "step": 7590 }, { "epoch": 0.41, "learning_rate": 4.777137885458701e-05, "loss": 0.7569, "step": 7595 }, { "epoch": 0.41, "learning_rate": 4.7768487648323144e-05, "loss": 0.7948, "step": 7600 }, { "epoch": 0.41, "learning_rate": 4.776559465548992e-05, "loss": 0.8371, "step": 7605 }, { "epoch": 0.41, "learning_rate": 4.776269987631434e-05, "loss": 0.7119, "step": 7610 }, { "epoch": 0.41, "learning_rate": 4.775980331102356e-05, "loss": 0.7255, "step": 7615 }, { "epoch": 0.41, "learning_rate": 4.7756904959844835e-05, "loss": 0.7239, "step": 7620 }, { "epoch": 0.41, "learning_rate": 4.775400482300561e-05, "loss": 0.8782, "step": 7625 }, { "epoch": 0.41, "learning_rate": 4.775110290073344e-05, "loss": 0.9146, "step": 7630 }, { "epoch": 0.41, "learning_rate": 4.774819919325605e-05, "loss": 0.7668, "step": 7635 }, { "epoch": 0.41, "learning_rate": 4.774529370080125e-05, "loss": 0.9005, "step": 7640 }, { "epoch": 0.41, "learning_rate": 4.7742386423597064e-05, "loss": 0.8589, "step": 7645 }, { "epoch": 0.41, "learning_rate": 4.773947736187158e-05, "loss": 0.7268, "step": 7650 }, { "epoch": 0.41, "learning_rate": 4.773656651585309e-05, "loss": 0.806, "step": 7655 }, { "epoch": 0.41, "learning_rate": 4.773365388576998e-05, "loss": 0.8754, "step": 7660 }, { "epoch": 0.41, "learning_rate": 4.77307394718508e-05, "loss": 0.6792, "step": 7665 }, { "epoch": 0.41, "learning_rate": 4.772782327432425e-05, "loss": 0.7556, "step": 7670 }, { "epoch": 0.41, "learning_rate": 4.772490529341913e-05, "loss": 0.8999, "step": 7675 }, { "epoch": 0.41, "learning_rate": 4.772198552936442e-05, "loss": 0.7984, "step": 7680 }, { "epoch": 0.41, "learning_rate": 4.771906398238922e-05, "loss": 0.6748, "step": 7685 }, { "epoch": 0.41, "learning_rate": 4.7716140652722774e-05, "loss": 0.9188, "step": 7690 }, { "epoch": 0.41, "learning_rate": 4.771321554059446e-05, "loss": 0.7875, "step": 7695 }, { "epoch": 0.41, "learning_rate": 4.771028864623382e-05, "loss": 0.7906, "step": 7700 }, { "epoch": 0.41, "learning_rate": 4.77073599698705e-05, "loss": 0.8845, "step": 7705 }, { "epoch": 0.41, "learning_rate": 4.7704429511734314e-05, "loss": 0.793, "step": 7710 }, { "epoch": 0.41, "learning_rate": 4.770149727205521e-05, "loss": 0.7802, "step": 7715 }, { "epoch": 0.41, "learning_rate": 4.769856325106325e-05, "loss": 0.9725, "step": 7720 }, { "epoch": 0.41, "learning_rate": 4.769562744898868e-05, "loss": 0.8408, "step": 7725 }, { "epoch": 0.41, "learning_rate": 4.769268986606185e-05, "loss": 0.8148, "step": 7730 }, { "epoch": 0.41, "learning_rate": 4.7689750502513255e-05, "loss": 0.9142, "step": 7735 }, { "epoch": 0.41, "learning_rate": 4.768680935857356e-05, "loss": 0.8864, "step": 7740 }, { "epoch": 0.41, "learning_rate": 4.7683866434473536e-05, "loss": 0.7944, "step": 7745 }, { "epoch": 0.41, "learning_rate": 4.76809217304441e-05, "loss": 0.8669, "step": 7750 }, { "epoch": 0.41, "learning_rate": 4.767797524671632e-05, "loss": 0.8018, "step": 7755 }, { "epoch": 0.42, "learning_rate": 4.7675026983521395e-05, "loss": 0.7345, "step": 7760 }, { "epoch": 0.42, "learning_rate": 4.767207694109066e-05, "loss": 0.6435, "step": 7765 }, { "epoch": 0.42, "learning_rate": 4.7669125119655604e-05, "loss": 0.7386, "step": 7770 }, { "epoch": 0.42, "learning_rate": 4.766617151944784e-05, "loss": 0.7342, "step": 7775 }, { "epoch": 0.42, "learning_rate": 4.7663216140699144e-05, "loss": 0.8328, "step": 7780 }, { "epoch": 0.42, "learning_rate": 4.7660258983641384e-05, "loss": 0.8689, "step": 7785 }, { "epoch": 0.42, "learning_rate": 4.7657300048506624e-05, "loss": 0.9127, "step": 7790 }, { "epoch": 0.42, "learning_rate": 4.765433933552703e-05, "loss": 0.8793, "step": 7795 }, { "epoch": 0.42, "learning_rate": 4.765137684493493e-05, "loss": 0.8439, "step": 7800 }, { "epoch": 0.42, "learning_rate": 4.7648412576962776e-05, "loss": 0.6499, "step": 7805 }, { "epoch": 0.42, "learning_rate": 4.764544653184316e-05, "loss": 0.7619, "step": 7810 }, { "epoch": 0.42, "learning_rate": 4.764247870980882e-05, "loss": 0.9041, "step": 7815 }, { "epoch": 0.42, "learning_rate": 4.763950911109263e-05, "loss": 0.7256, "step": 7820 }, { "epoch": 0.42, "learning_rate": 4.7636537735927613e-05, "loss": 0.8597, "step": 7825 }, { "epoch": 0.42, "learning_rate": 4.7633564584546916e-05, "loss": 0.8381, "step": 7830 }, { "epoch": 0.42, "learning_rate": 4.7630589657183835e-05, "loss": 0.8253, "step": 7835 }, { "epoch": 0.42, "learning_rate": 4.76276129540718e-05, "loss": 0.723, "step": 7840 }, { "epoch": 0.42, "learning_rate": 4.762463447544438e-05, "loss": 0.8066, "step": 7845 }, { "epoch": 0.42, "learning_rate": 4.7621654221535293e-05, "loss": 0.809, "step": 7850 }, { "epoch": 0.42, "learning_rate": 4.76186721925784e-05, "loss": 0.7983, "step": 7855 }, { "epoch": 0.42, "learning_rate": 4.7615688388807665e-05, "loss": 0.7596, "step": 7860 }, { "epoch": 0.42, "learning_rate": 4.761270281045724e-05, "loss": 0.9377, "step": 7865 }, { "epoch": 0.42, "learning_rate": 4.7609715457761386e-05, "loss": 0.7259, "step": 7870 }, { "epoch": 0.42, "learning_rate": 4.760672633095451e-05, "loss": 0.8541, "step": 7875 }, { "epoch": 0.42, "learning_rate": 4.760373543027116e-05, "loss": 0.9423, "step": 7880 }, { "epoch": 0.42, "learning_rate": 4.760074275594602e-05, "loss": 0.7212, "step": 7885 }, { "epoch": 0.42, "learning_rate": 4.759774830821392e-05, "loss": 0.8465, "step": 7890 }, { "epoch": 0.42, "learning_rate": 4.759475208730982e-05, "loss": 0.987, "step": 7895 }, { "epoch": 0.42, "learning_rate": 4.7591754093468834e-05, "loss": 0.7307, "step": 7900 }, { "epoch": 0.42, "learning_rate": 4.7588754326926184e-05, "loss": 0.8346, "step": 7905 }, { "epoch": 0.42, "learning_rate": 4.758575278791727e-05, "loss": 0.7288, "step": 7910 }, { "epoch": 0.42, "learning_rate": 4.7582749476677605e-05, "loss": 0.7893, "step": 7915 }, { "epoch": 0.42, "learning_rate": 4.7579744393442855e-05, "loss": 0.8403, "step": 7920 }, { "epoch": 0.42, "learning_rate": 4.757673753844881e-05, "loss": 0.7984, "step": 7925 }, { "epoch": 0.42, "learning_rate": 4.7573728911931424e-05, "loss": 0.8746, "step": 7930 }, { "epoch": 0.42, "learning_rate": 4.7570718514126764e-05, "loss": 0.8429, "step": 7935 }, { "epoch": 0.42, "learning_rate": 4.756770634527104e-05, "loss": 0.8346, "step": 7940 }, { "epoch": 0.43, "learning_rate": 4.7564692405600616e-05, "loss": 0.8847, "step": 7945 }, { "epoch": 0.43, "learning_rate": 4.7561676695351976e-05, "loss": 0.6616, "step": 7950 }, { "epoch": 0.43, "learning_rate": 4.755865921476177e-05, "loss": 0.804, "step": 7955 }, { "epoch": 0.43, "learning_rate": 4.7555639964066754e-05, "loss": 0.7363, "step": 7960 }, { "epoch": 0.43, "learning_rate": 4.755261894350385e-05, "loss": 0.8548, "step": 7965 }, { "epoch": 0.43, "learning_rate": 4.75495961533101e-05, "loss": 0.8168, "step": 7970 }, { "epoch": 0.43, "learning_rate": 4.75465715937227e-05, "loss": 0.6345, "step": 7975 }, { "epoch": 0.43, "learning_rate": 4.754354526497897e-05, "loss": 0.9102, "step": 7980 }, { "epoch": 0.43, "learning_rate": 4.754051716731638e-05, "loss": 0.7096, "step": 7985 }, { "epoch": 0.43, "learning_rate": 4.7537487300972525e-05, "loss": 0.7356, "step": 7990 }, { "epoch": 0.43, "learning_rate": 4.7534455666185166e-05, "loss": 0.688, "step": 7995 }, { "epoch": 0.43, "learning_rate": 4.753142226319218e-05, "loss": 0.689, "step": 8000 }, { "epoch": 0.43, "learning_rate": 4.752838709223158e-05, "loss": 0.7881, "step": 8005 }, { "epoch": 0.43, "learning_rate": 4.7525350153541534e-05, "loss": 0.9296, "step": 8010 }, { "epoch": 0.43, "learning_rate": 4.7522311447360343e-05, "loss": 0.8627, "step": 8015 }, { "epoch": 0.43, "learning_rate": 4.7519270973926433e-05, "loss": 0.793, "step": 8020 }, { "epoch": 0.43, "learning_rate": 4.751622873347838e-05, "loss": 0.7584, "step": 8025 }, { "epoch": 0.43, "learning_rate": 4.751318472625492e-05, "loss": 0.8536, "step": 8030 }, { "epoch": 0.43, "learning_rate": 4.751013895249489e-05, "loss": 0.8413, "step": 8035 }, { "epoch": 0.43, "learning_rate": 4.7507091412437276e-05, "loss": 0.8454, "step": 8040 }, { "epoch": 0.43, "learning_rate": 4.750404210632122e-05, "loss": 0.7701, "step": 8045 }, { "epoch": 0.43, "learning_rate": 4.750099103438599e-05, "loss": 0.8004, "step": 8050 }, { "epoch": 0.43, "learning_rate": 4.7497938196871e-05, "loss": 0.9889, "step": 8055 }, { "epoch": 0.43, "learning_rate": 4.749488359401577e-05, "loss": 0.7515, "step": 8060 }, { "epoch": 0.43, "learning_rate": 4.7491827226060014e-05, "loss": 0.9324, "step": 8065 }, { "epoch": 0.43, "learning_rate": 4.7488769093243535e-05, "loss": 0.8818, "step": 8070 }, { "epoch": 0.43, "learning_rate": 4.748570919580631e-05, "loss": 0.7374, "step": 8075 }, { "epoch": 0.43, "learning_rate": 4.7482647533988435e-05, "loss": 0.9266, "step": 8080 }, { "epoch": 0.43, "learning_rate": 4.7479584108030136e-05, "loss": 0.7565, "step": 8085 }, { "epoch": 0.43, "learning_rate": 4.747651891817181e-05, "loss": 0.8086, "step": 8090 }, { "epoch": 0.43, "learning_rate": 4.747345196465396e-05, "loss": 0.9391, "step": 8095 }, { "epoch": 0.43, "learning_rate": 4.7470383247717244e-05, "loss": 0.6459, "step": 8100 }, { "epoch": 0.43, "learning_rate": 4.746731276760245e-05, "loss": 0.7412, "step": 8105 }, { "epoch": 0.43, "learning_rate": 4.746424052455052e-05, "loss": 0.7047, "step": 8110 }, { "epoch": 0.43, "learning_rate": 4.74611665188025e-05, "loss": 0.7226, "step": 8115 }, { "epoch": 0.43, "learning_rate": 4.745809075059963e-05, "loss": 0.8196, "step": 8120 }, { "epoch": 0.43, "learning_rate": 4.745501322018322e-05, "loss": 0.5435, "step": 8125 }, { "epoch": 0.43, "learning_rate": 4.7451933927794777e-05, "loss": 0.8914, "step": 8130 }, { "epoch": 0.44, "learning_rate": 4.744885287367592e-05, "loss": 0.8608, "step": 8135 }, { "epoch": 0.44, "learning_rate": 4.744577005806841e-05, "loss": 0.859, "step": 8140 }, { "epoch": 0.44, "learning_rate": 4.744268548121414e-05, "loss": 0.8198, "step": 8145 }, { "epoch": 0.44, "learning_rate": 4.743959914335515e-05, "loss": 0.882, "step": 8150 }, { "epoch": 0.44, "learning_rate": 4.743651104473361e-05, "loss": 0.735, "step": 8155 }, { "epoch": 0.44, "learning_rate": 4.743342118559184e-05, "loss": 0.6242, "step": 8160 }, { "epoch": 0.44, "learning_rate": 4.7430329566172284e-05, "loss": 0.7974, "step": 8165 }, { "epoch": 0.44, "learning_rate": 4.742723618671754e-05, "loss": 0.7568, "step": 8170 }, { "epoch": 0.44, "learning_rate": 4.7424141047470326e-05, "loss": 0.8233, "step": 8175 }, { "epoch": 0.44, "learning_rate": 4.742104414867352e-05, "loss": 0.7301, "step": 8180 }, { "epoch": 0.44, "learning_rate": 4.741794549057012e-05, "loss": 0.7417, "step": 8185 }, { "epoch": 0.44, "learning_rate": 4.741484507340326e-05, "loss": 0.8588, "step": 8190 }, { "epoch": 0.44, "learning_rate": 4.741174289741622e-05, "loss": 0.633, "step": 8195 }, { "epoch": 0.44, "learning_rate": 4.740863896285243e-05, "loss": 0.7923, "step": 8200 }, { "epoch": 0.44, "learning_rate": 4.740553326995544e-05, "loss": 0.8328, "step": 8205 }, { "epoch": 0.44, "learning_rate": 4.740242581896894e-05, "loss": 0.6267, "step": 8210 }, { "epoch": 0.44, "learning_rate": 4.739931661013676e-05, "loss": 0.8507, "step": 8215 }, { "epoch": 0.44, "learning_rate": 4.739620564370288e-05, "loss": 0.8477, "step": 8220 }, { "epoch": 0.44, "learning_rate": 4.7393092919911394e-05, "loss": 0.8198, "step": 8225 }, { "epoch": 0.44, "learning_rate": 4.738997843900656e-05, "loss": 0.7434, "step": 8230 }, { "epoch": 0.44, "learning_rate": 4.738686220123276e-05, "loss": 0.8423, "step": 8235 }, { "epoch": 0.44, "learning_rate": 4.738374420683451e-05, "loss": 0.7117, "step": 8240 }, { "epoch": 0.44, "learning_rate": 4.738062445605646e-05, "loss": 0.7681, "step": 8245 }, { "epoch": 0.44, "learning_rate": 4.7377502949143423e-05, "loss": 0.7729, "step": 8250 }, { "epoch": 0.44, "learning_rate": 4.737437968634033e-05, "loss": 0.8329, "step": 8255 }, { "epoch": 0.44, "learning_rate": 4.737125466789224e-05, "loss": 0.8426, "step": 8260 }, { "epoch": 0.44, "learning_rate": 4.736812789404438e-05, "loss": 0.6936, "step": 8265 }, { "epoch": 0.44, "learning_rate": 4.736499936504209e-05, "loss": 0.763, "step": 8270 }, { "epoch": 0.44, "learning_rate": 4.736186908113086e-05, "loss": 0.9509, "step": 8275 }, { "epoch": 0.44, "learning_rate": 4.73587370425563e-05, "loss": 0.823, "step": 8280 }, { "epoch": 0.44, "learning_rate": 4.735560324956419e-05, "loss": 0.9261, "step": 8285 }, { "epoch": 0.44, "learning_rate": 4.735246770240042e-05, "loss": 0.7226, "step": 8290 }, { "epoch": 0.44, "learning_rate": 4.734933040131102e-05, "loss": 0.8563, "step": 8295 }, { "epoch": 0.44, "learning_rate": 4.734619134654217e-05, "loss": 0.8009, "step": 8300 }, { "epoch": 0.44, "learning_rate": 4.7343050538340186e-05, "loss": 0.8793, "step": 8305 }, { "epoch": 0.44, "learning_rate": 4.73399079769515e-05, "loss": 0.8894, "step": 8310 }, { "epoch": 0.44, "learning_rate": 4.733676366262272e-05, "loss": 0.8194, "step": 8315 }, { "epoch": 0.45, "learning_rate": 4.733361759560055e-05, "loss": 0.8656, "step": 8320 }, { "epoch": 0.45, "learning_rate": 4.7330469776131884e-05, "loss": 0.85, "step": 8325 }, { "epoch": 0.45, "learning_rate": 4.7327320204463675e-05, "loss": 0.8959, "step": 8330 }, { "epoch": 0.45, "learning_rate": 4.73241688808431e-05, "loss": 0.8641, "step": 8335 }, { "epoch": 0.45, "learning_rate": 4.732101580551741e-05, "loss": 0.7719, "step": 8340 }, { "epoch": 0.45, "learning_rate": 4.731786097873403e-05, "loss": 0.8282, "step": 8345 }, { "epoch": 0.45, "learning_rate": 4.73147044007405e-05, "loss": 0.8455, "step": 8350 }, { "epoch": 0.45, "learning_rate": 4.7311546071784506e-05, "loss": 0.7587, "step": 8355 }, { "epoch": 0.45, "learning_rate": 4.730838599211388e-05, "loss": 0.7136, "step": 8360 }, { "epoch": 0.45, "learning_rate": 4.730522416197657e-05, "loss": 0.79, "step": 8365 }, { "epoch": 0.45, "learning_rate": 4.730206058162069e-05, "loss": 0.8731, "step": 8370 }, { "epoch": 0.45, "learning_rate": 4.7298895251294464e-05, "loss": 0.817, "step": 8375 }, { "epoch": 0.45, "learning_rate": 4.729572817124627e-05, "loss": 0.8334, "step": 8380 }, { "epoch": 0.45, "learning_rate": 4.729255934172462e-05, "loss": 0.7309, "step": 8385 }, { "epoch": 0.45, "learning_rate": 4.728938876297816e-05, "loss": 0.7559, "step": 8390 }, { "epoch": 0.45, "learning_rate": 4.7286216435255685e-05, "loss": 0.7146, "step": 8395 }, { "epoch": 0.45, "learning_rate": 4.728304235880609e-05, "loss": 0.8805, "step": 8400 }, { "epoch": 0.45, "learning_rate": 4.727986653387846e-05, "loss": 0.9933, "step": 8405 }, { "epoch": 0.45, "learning_rate": 4.727668896072199e-05, "loss": 0.6786, "step": 8410 }, { "epoch": 0.45, "learning_rate": 4.7273509639586e-05, "loss": 0.8467, "step": 8415 }, { "epoch": 0.45, "learning_rate": 4.727032857071997e-05, "loss": 0.8853, "step": 8420 }, { "epoch": 0.45, "learning_rate": 4.72671457543735e-05, "loss": 0.8622, "step": 8425 }, { "epoch": 0.45, "learning_rate": 4.7263961190796353e-05, "loss": 0.6195, "step": 8430 }, { "epoch": 0.45, "learning_rate": 4.7260774880238396e-05, "loss": 0.8503, "step": 8435 }, { "epoch": 0.45, "learning_rate": 4.7257586822949654e-05, "loss": 0.7259, "step": 8440 }, { "epoch": 0.45, "learning_rate": 4.725439701918028e-05, "loss": 0.737, "step": 8445 }, { "epoch": 0.45, "learning_rate": 4.725120546918057e-05, "loss": 0.8469, "step": 8450 }, { "epoch": 0.45, "learning_rate": 4.724801217320095e-05, "loss": 0.7303, "step": 8455 }, { "epoch": 0.45, "learning_rate": 4.7244817131492004e-05, "loss": 0.7396, "step": 8460 }, { "epoch": 0.45, "learning_rate": 4.724162034430442e-05, "loss": 0.946, "step": 8465 }, { "epoch": 0.45, "learning_rate": 4.723842181188903e-05, "loss": 0.7625, "step": 8470 }, { "epoch": 0.45, "learning_rate": 4.723522153449684e-05, "loss": 0.8024, "step": 8475 }, { "epoch": 0.45, "learning_rate": 4.7232019512378954e-05, "loss": 0.739, "step": 8480 }, { "epoch": 0.45, "learning_rate": 4.7228815745786616e-05, "loss": 0.6964, "step": 8485 }, { "epoch": 0.45, "learning_rate": 4.722561023497123e-05, "loss": 0.8487, "step": 8490 }, { "epoch": 0.45, "learning_rate": 4.72224029801843e-05, "loss": 0.8217, "step": 8495 }, { "epoch": 0.45, "learning_rate": 4.721919398167751e-05, "loss": 0.8176, "step": 8500 }, { "epoch": 0.45, "learning_rate": 4.721598323970265e-05, "loss": 0.7314, "step": 8505 }, { "epoch": 0.46, "learning_rate": 4.721277075451166e-05, "loss": 0.9069, "step": 8510 }, { "epoch": 0.46, "learning_rate": 4.7209556526356616e-05, "loss": 0.8434, "step": 8515 }, { "epoch": 0.46, "learning_rate": 4.7206340555489714e-05, "loss": 0.6144, "step": 8520 }, { "epoch": 0.46, "learning_rate": 4.7203122842163316e-05, "loss": 0.8412, "step": 8525 }, { "epoch": 0.46, "learning_rate": 4.719990338662989e-05, "loss": 0.6959, "step": 8530 }, { "epoch": 0.46, "learning_rate": 4.7196682189142083e-05, "loss": 0.6124, "step": 8535 }, { "epoch": 0.46, "learning_rate": 4.719345924995263e-05, "loss": 0.8663, "step": 8540 }, { "epoch": 0.46, "learning_rate": 4.7190234569314426e-05, "loss": 0.8179, "step": 8545 }, { "epoch": 0.46, "learning_rate": 4.718700814748051e-05, "loss": 0.7653, "step": 8550 }, { "epoch": 0.46, "learning_rate": 4.7183779984704034e-05, "loss": 0.8646, "step": 8555 }, { "epoch": 0.46, "learning_rate": 4.718055008123832e-05, "loss": 0.7132, "step": 8560 }, { "epoch": 0.46, "learning_rate": 4.71773184373368e-05, "loss": 0.8337, "step": 8565 }, { "epoch": 0.46, "learning_rate": 4.717408505325305e-05, "loss": 0.6969, "step": 8570 }, { "epoch": 0.46, "learning_rate": 4.717084992924078e-05, "loss": 0.7786, "step": 8575 }, { "epoch": 0.46, "learning_rate": 4.716761306555384e-05, "loss": 0.863, "step": 8580 }, { "epoch": 0.46, "learning_rate": 4.7164374462446224e-05, "loss": 0.8722, "step": 8585 }, { "epoch": 0.46, "learning_rate": 4.7161134120172045e-05, "loss": 0.8318, "step": 8590 }, { "epoch": 0.46, "learning_rate": 4.7157892038985575e-05, "loss": 0.7558, "step": 8595 }, { "epoch": 0.46, "learning_rate": 4.7154648219141195e-05, "loss": 0.7415, "step": 8600 }, { "epoch": 0.46, "learning_rate": 4.715140266089345e-05, "loss": 0.645, "step": 8605 }, { "epoch": 0.46, "learning_rate": 4.7148155364496994e-05, "loss": 0.8102, "step": 8610 }, { "epoch": 0.46, "learning_rate": 4.714490633020664e-05, "loss": 0.8066, "step": 8615 }, { "epoch": 0.46, "learning_rate": 4.7141655558277335e-05, "loss": 0.8735, "step": 8620 }, { "epoch": 0.46, "learning_rate": 4.713840304896415e-05, "loss": 0.7703, "step": 8625 }, { "epoch": 0.46, "learning_rate": 4.7135148802522294e-05, "loss": 0.7764, "step": 8630 }, { "epoch": 0.46, "learning_rate": 4.713189281920712e-05, "loss": 0.771, "step": 8635 }, { "epoch": 0.46, "learning_rate": 4.712863509927413e-05, "loss": 0.6983, "step": 8640 }, { "epoch": 0.46, "learning_rate": 4.712537564297892e-05, "loss": 0.6602, "step": 8645 }, { "epoch": 0.46, "learning_rate": 4.712211445057727e-05, "loss": 0.855, "step": 8650 }, { "epoch": 0.46, "learning_rate": 4.711885152232507e-05, "loss": 0.8558, "step": 8655 }, { "epoch": 0.46, "learning_rate": 4.711558685847834e-05, "loss": 0.838, "step": 8660 }, { "epoch": 0.46, "learning_rate": 4.711232045929327e-05, "loss": 0.7582, "step": 8665 }, { "epoch": 0.46, "learning_rate": 4.710905232502614e-05, "loss": 0.7527, "step": 8670 }, { "epoch": 0.46, "learning_rate": 4.71057824559334e-05, "loss": 0.7696, "step": 8675 }, { "epoch": 0.46, "learning_rate": 4.710251085227163e-05, "loss": 0.7761, "step": 8680 }, { "epoch": 0.46, "learning_rate": 4.709923751429755e-05, "loss": 0.8355, "step": 8685 }, { "epoch": 0.46, "learning_rate": 4.7095962442267974e-05, "loss": 0.7672, "step": 8690 }, { "epoch": 0.47, "learning_rate": 4.7092685636439926e-05, "loss": 0.8688, "step": 8695 }, { "epoch": 0.47, "learning_rate": 4.708940709707051e-05, "loss": 0.6361, "step": 8700 }, { "epoch": 0.47, "learning_rate": 4.7086126824416965e-05, "loss": 0.7768, "step": 8705 }, { "epoch": 0.47, "learning_rate": 4.708284481873672e-05, "loss": 0.7763, "step": 8710 }, { "epoch": 0.47, "learning_rate": 4.7079561080287274e-05, "loss": 0.7752, "step": 8715 }, { "epoch": 0.47, "learning_rate": 4.707627560932629e-05, "loss": 0.8171, "step": 8720 }, { "epoch": 0.47, "learning_rate": 4.7072988406111595e-05, "loss": 0.8556, "step": 8725 }, { "epoch": 0.47, "learning_rate": 4.70696994709011e-05, "loss": 0.6163, "step": 8730 }, { "epoch": 0.47, "learning_rate": 4.706640880395289e-05, "loss": 0.8174, "step": 8735 }, { "epoch": 0.47, "learning_rate": 4.706311640552517e-05, "loss": 0.7525, "step": 8740 }, { "epoch": 0.47, "learning_rate": 4.7059822275876286e-05, "loss": 0.7923, "step": 8745 }, { "epoch": 0.47, "learning_rate": 4.705652641526471e-05, "loss": 0.7434, "step": 8750 }, { "epoch": 0.47, "learning_rate": 4.705322882394906e-05, "loss": 0.802, "step": 8755 }, { "epoch": 0.47, "learning_rate": 4.704992950218809e-05, "loss": 0.8143, "step": 8760 }, { "epoch": 0.47, "learning_rate": 4.7046628450240696e-05, "loss": 0.8047, "step": 8765 }, { "epoch": 0.47, "learning_rate": 4.704332566836588e-05, "loss": 0.6798, "step": 8770 }, { "epoch": 0.47, "learning_rate": 4.704002115682281e-05, "loss": 0.6715, "step": 8775 }, { "epoch": 0.47, "learning_rate": 4.7036714915870795e-05, "loss": 0.6971, "step": 8780 }, { "epoch": 0.47, "learning_rate": 4.7033406945769246e-05, "loss": 0.8731, "step": 8785 }, { "epoch": 0.47, "learning_rate": 4.703009724677773e-05, "loss": 0.6715, "step": 8790 }, { "epoch": 0.47, "learning_rate": 4.702678581915596e-05, "loss": 0.8188, "step": 8795 }, { "epoch": 0.47, "learning_rate": 4.702347266316376e-05, "loss": 0.6578, "step": 8800 }, { "epoch": 0.47, "learning_rate": 4.702015777906111e-05, "loss": 0.7778, "step": 8805 }, { "epoch": 0.47, "learning_rate": 4.701684116710813e-05, "loss": 0.7524, "step": 8810 }, { "epoch": 0.47, "learning_rate": 4.701352282756504e-05, "loss": 0.9254, "step": 8815 }, { "epoch": 0.47, "learning_rate": 4.701020276069222e-05, "loss": 0.8136, "step": 8820 }, { "epoch": 0.47, "learning_rate": 4.700688096675021e-05, "loss": 0.8761, "step": 8825 }, { "epoch": 0.47, "learning_rate": 4.700355744599965e-05, "loss": 0.7901, "step": 8830 }, { "epoch": 0.47, "learning_rate": 4.700023219870131e-05, "loss": 0.8181, "step": 8835 }, { "epoch": 0.47, "learning_rate": 4.699690522511612e-05, "loss": 0.7215, "step": 8840 }, { "epoch": 0.47, "learning_rate": 4.699357652550515e-05, "loss": 0.6972, "step": 8845 }, { "epoch": 0.47, "learning_rate": 4.6990246100129576e-05, "loss": 0.6351, "step": 8850 }, { "epoch": 0.47, "learning_rate": 4.698691394925073e-05, "loss": 0.7755, "step": 8855 }, { "epoch": 0.47, "learning_rate": 4.698358007313008e-05, "loss": 0.7657, "step": 8860 }, { "epoch": 0.47, "learning_rate": 4.698024447202922e-05, "loss": 0.7957, "step": 8865 }, { "epoch": 0.47, "learning_rate": 4.697690714620988e-05, "loss": 0.8218, "step": 8870 }, { "epoch": 0.47, "learning_rate": 4.697356809593394e-05, "loss": 0.8059, "step": 8875 }, { "epoch": 0.48, "learning_rate": 4.6970227321463396e-05, "loss": 0.8861, "step": 8880 }, { "epoch": 0.48, "learning_rate": 4.696688482306039e-05, "loss": 0.6951, "step": 8885 }, { "epoch": 0.48, "learning_rate": 4.69635406009872e-05, "loss": 0.7081, "step": 8890 }, { "epoch": 0.48, "learning_rate": 4.696019465550623e-05, "loss": 0.7778, "step": 8895 }, { "epoch": 0.48, "learning_rate": 4.6956846986880035e-05, "loss": 0.854, "step": 8900 }, { "epoch": 0.48, "learning_rate": 4.6953497595371275e-05, "loss": 0.7726, "step": 8905 }, { "epoch": 0.48, "learning_rate": 4.6950146481242794e-05, "loss": 0.7738, "step": 8910 }, { "epoch": 0.48, "learning_rate": 4.6946793644757526e-05, "loss": 0.7351, "step": 8915 }, { "epoch": 0.48, "learning_rate": 4.694343908617855e-05, "loss": 0.7159, "step": 8920 }, { "epoch": 0.48, "learning_rate": 4.694008280576911e-05, "loss": 0.8371, "step": 8925 }, { "epoch": 0.48, "learning_rate": 4.693672480379255e-05, "loss": 0.7943, "step": 8930 }, { "epoch": 0.48, "learning_rate": 4.693336508051236e-05, "loss": 0.7223, "step": 8935 }, { "epoch": 0.48, "learning_rate": 4.693000363619218e-05, "loss": 0.6927, "step": 8940 }, { "epoch": 0.48, "learning_rate": 4.692664047109574e-05, "loss": 0.824, "step": 8945 }, { "epoch": 0.48, "learning_rate": 4.692327558548697e-05, "loss": 0.8323, "step": 8950 }, { "epoch": 0.48, "learning_rate": 4.6919908979629877e-05, "loss": 0.7352, "step": 8955 }, { "epoch": 0.48, "learning_rate": 4.691654065378865e-05, "loss": 0.7701, "step": 8960 }, { "epoch": 0.48, "learning_rate": 4.6913170608227574e-05, "loss": 0.8181, "step": 8965 }, { "epoch": 0.48, "learning_rate": 4.69097988432111e-05, "loss": 0.774, "step": 8970 }, { "epoch": 0.48, "learning_rate": 4.690642535900378e-05, "loss": 0.7599, "step": 8975 }, { "epoch": 0.48, "learning_rate": 4.690305015587033e-05, "loss": 0.7864, "step": 8980 }, { "epoch": 0.48, "learning_rate": 4.6899673234075604e-05, "loss": 0.9758, "step": 8985 }, { "epoch": 0.48, "learning_rate": 4.689629459388456e-05, "loss": 0.7617, "step": 8990 }, { "epoch": 0.48, "learning_rate": 4.6892914235562313e-05, "loss": 0.7715, "step": 8995 }, { "epoch": 0.48, "learning_rate": 4.6889532159374114e-05, "loss": 0.8653, "step": 9000 }, { "epoch": 0.48, "learning_rate": 4.688614836558534e-05, "loss": 0.7397, "step": 9005 }, { "epoch": 0.48, "learning_rate": 4.68827628544615e-05, "loss": 0.8779, "step": 9010 }, { "epoch": 0.48, "learning_rate": 4.6879375626268265e-05, "loss": 0.6842, "step": 9015 }, { "epoch": 0.48, "learning_rate": 4.6875986681271396e-05, "loss": 0.7844, "step": 9020 }, { "epoch": 0.48, "learning_rate": 4.687259601973683e-05, "loss": 0.8776, "step": 9025 }, { "epoch": 0.48, "learning_rate": 4.6869203641930604e-05, "loss": 0.8472, "step": 9030 }, { "epoch": 0.48, "learning_rate": 4.686580954811892e-05, "loss": 0.7991, "step": 9035 }, { "epoch": 0.48, "learning_rate": 4.68624137385681e-05, "loss": 0.8891, "step": 9040 }, { "epoch": 0.48, "learning_rate": 4.68590162135446e-05, "loss": 0.8141, "step": 9045 }, { "epoch": 0.48, "learning_rate": 4.6855616973315005e-05, "loss": 0.7931, "step": 9050 }, { "epoch": 0.48, "learning_rate": 4.6852216018146064e-05, "loss": 0.8202, "step": 9055 }, { "epoch": 0.48, "learning_rate": 4.684881334830462e-05, "loss": 0.6925, "step": 9060 }, { "epoch": 0.48, "learning_rate": 4.684540896405767e-05, "loss": 0.7158, "step": 9065 }, { "epoch": 0.49, "learning_rate": 4.684200286567235e-05, "loss": 0.7355, "step": 9070 }, { "epoch": 0.49, "learning_rate": 4.6838595053415926e-05, "loss": 0.7648, "step": 9075 }, { "epoch": 0.49, "learning_rate": 4.68351855275558e-05, "loss": 0.821, "step": 9080 }, { "epoch": 0.49, "learning_rate": 4.683177428835951e-05, "loss": 0.8214, "step": 9085 }, { "epoch": 0.49, "learning_rate": 4.6828361336094705e-05, "loss": 0.6237, "step": 9090 }, { "epoch": 0.49, "learning_rate": 4.682494667102921e-05, "loss": 0.9444, "step": 9095 }, { "epoch": 0.49, "learning_rate": 4.682153029343095e-05, "loss": 0.7413, "step": 9100 }, { "epoch": 0.49, "learning_rate": 4.6818112203568e-05, "loss": 0.8892, "step": 9105 }, { "epoch": 0.49, "learning_rate": 4.681469240170857e-05, "loss": 0.8794, "step": 9110 }, { "epoch": 0.49, "learning_rate": 4.6811270888121006e-05, "loss": 0.756, "step": 9115 }, { "epoch": 0.49, "learning_rate": 4.680784766307377e-05, "loss": 0.7525, "step": 9120 }, { "epoch": 0.49, "learning_rate": 4.680442272683547e-05, "loss": 0.9392, "step": 9125 }, { "epoch": 0.49, "learning_rate": 4.680099607967487e-05, "loss": 0.7322, "step": 9130 }, { "epoch": 0.49, "learning_rate": 4.679756772186083e-05, "loss": 0.8301, "step": 9135 }, { "epoch": 0.49, "learning_rate": 4.679413765366236e-05, "loss": 0.724, "step": 9140 }, { "epoch": 0.49, "learning_rate": 4.6790705875348614e-05, "loss": 0.8354, "step": 9145 }, { "epoch": 0.49, "learning_rate": 4.678727238718888e-05, "loss": 0.8303, "step": 9150 }, { "epoch": 0.49, "learning_rate": 4.6783837189452565e-05, "loss": 0.9175, "step": 9155 }, { "epoch": 0.49, "learning_rate": 4.678040028240921e-05, "loss": 0.8232, "step": 9160 }, { "epoch": 0.49, "learning_rate": 4.67769616663285e-05, "loss": 0.8086, "step": 9165 }, { "epoch": 0.49, "learning_rate": 4.677352134148026e-05, "loss": 0.9872, "step": 9170 }, { "epoch": 0.49, "learning_rate": 4.677007930813445e-05, "loss": 0.6335, "step": 9175 }, { "epoch": 0.49, "learning_rate": 4.676663556656114e-05, "loss": 0.8807, "step": 9180 }, { "epoch": 0.49, "learning_rate": 4.6763190117030543e-05, "loss": 0.7734, "step": 9185 }, { "epoch": 0.49, "learning_rate": 4.6759742959813027e-05, "loss": 0.8999, "step": 9190 }, { "epoch": 0.49, "learning_rate": 4.675629409517907e-05, "loss": 0.8552, "step": 9195 }, { "epoch": 0.49, "learning_rate": 4.6752843523399305e-05, "loss": 0.6333, "step": 9200 }, { "epoch": 0.49, "learning_rate": 4.674939124474447e-05, "loss": 0.7171, "step": 9205 }, { "epoch": 0.49, "learning_rate": 4.674593725948547e-05, "loss": 0.8502, "step": 9210 }, { "epoch": 0.49, "learning_rate": 4.6742481567893324e-05, "loss": 0.7937, "step": 9215 }, { "epoch": 0.49, "learning_rate": 4.6739024170239175e-05, "loss": 0.7489, "step": 9220 }, { "epoch": 0.49, "learning_rate": 4.673556506679434e-05, "loss": 0.8656, "step": 9225 }, { "epoch": 0.49, "learning_rate": 4.6732104257830226e-05, "loss": 0.9049, "step": 9230 }, { "epoch": 0.49, "learning_rate": 4.672864174361839e-05, "loss": 0.8415, "step": 9235 }, { "epoch": 0.49, "learning_rate": 4.6725177524430524e-05, "loss": 0.6675, "step": 9240 }, { "epoch": 0.49, "learning_rate": 4.6721711600538466e-05, "loss": 0.8559, "step": 9245 }, { "epoch": 0.49, "learning_rate": 4.671824397221416e-05, "loss": 0.6655, "step": 9250 }, { "epoch": 0.5, "learning_rate": 4.671477463972972e-05, "loss": 0.8897, "step": 9255 }, { "epoch": 0.5, "learning_rate": 4.671130360335735e-05, "loss": 0.6493, "step": 9260 }, { "epoch": 0.5, "learning_rate": 4.670783086336943e-05, "loss": 0.8354, "step": 9265 }, { "epoch": 0.5, "learning_rate": 4.670435642003844e-05, "loss": 0.8187, "step": 9270 }, { "epoch": 0.5, "learning_rate": 4.670088027363703e-05, "loss": 0.8215, "step": 9275 }, { "epoch": 0.5, "learning_rate": 4.6697402424437934e-05, "loss": 0.7948, "step": 9280 }, { "epoch": 0.5, "learning_rate": 4.6693922872714055e-05, "loss": 0.8136, "step": 9285 }, { "epoch": 0.5, "learning_rate": 4.669044161873845e-05, "loss": 0.7081, "step": 9290 }, { "epoch": 0.5, "learning_rate": 4.668695866278424e-05, "loss": 0.7798, "step": 9295 }, { "epoch": 0.5, "learning_rate": 4.668347400512474e-05, "loss": 0.8168, "step": 9300 }, { "epoch": 0.5, "learning_rate": 4.667998764603339e-05, "loss": 0.843, "step": 9305 }, { "epoch": 0.5, "learning_rate": 4.667649958578374e-05, "loss": 0.7505, "step": 9310 }, { "epoch": 0.5, "learning_rate": 4.6673009824649495e-05, "loss": 0.7307, "step": 9315 }, { "epoch": 0.5, "learning_rate": 4.666951836290448e-05, "loss": 0.867, "step": 9320 }, { "epoch": 0.5, "learning_rate": 4.666602520082265e-05, "loss": 0.8448, "step": 9325 }, { "epoch": 0.5, "learning_rate": 4.6662530338678126e-05, "loss": 0.8344, "step": 9330 }, { "epoch": 0.5, "learning_rate": 4.665903377674511e-05, "loss": 0.9449, "step": 9335 }, { "epoch": 0.5, "learning_rate": 4.6655535515297985e-05, "loss": 0.6916, "step": 9340 }, { "epoch": 0.5, "learning_rate": 4.6652035554611243e-05, "loss": 0.866, "step": 9345 }, { "epoch": 0.5, "learning_rate": 4.664853389495952e-05, "loss": 0.9566, "step": 9350 }, { "epoch": 0.5, "learning_rate": 4.6645030536617565e-05, "loss": 0.7587, "step": 9355 }, { "epoch": 0.5, "learning_rate": 4.664152547986029e-05, "loss": 0.8226, "step": 9360 }, { "epoch": 0.5, "learning_rate": 4.663801872496273e-05, "loss": 0.8038, "step": 9365 }, { "epoch": 0.5, "learning_rate": 4.6634510272200024e-05, "loss": 0.8376, "step": 9370 }, { "epoch": 0.5, "learning_rate": 4.663100012184749e-05, "loss": 0.7771, "step": 9375 }, { "epoch": 0.5, "learning_rate": 4.6627488274180554e-05, "loss": 0.769, "step": 9380 }, { "epoch": 0.5, "learning_rate": 4.662397472947477e-05, "loss": 0.846, "step": 9385 }, { "epoch": 0.5, "learning_rate": 4.662045948800585e-05, "loss": 0.7619, "step": 9390 }, { "epoch": 0.5, "learning_rate": 4.661694255004961e-05, "loss": 0.7534, "step": 9395 }, { "epoch": 0.5, "learning_rate": 4.6613423915882014e-05, "loss": 0.805, "step": 9400 }, { "epoch": 0.5, "learning_rate": 4.660990358577917e-05, "loss": 0.9179, "step": 9405 }, { "epoch": 0.5, "learning_rate": 4.66063815600173e-05, "loss": 0.711, "step": 9410 }, { "epoch": 0.5, "learning_rate": 4.660285783887276e-05, "loss": 0.9107, "step": 9415 }, { "epoch": 0.5, "learning_rate": 4.659933242262204e-05, "loss": 0.666, "step": 9420 }, { "epoch": 0.5, "learning_rate": 4.6595805311541793e-05, "loss": 0.7529, "step": 9425 }, { "epoch": 0.5, "learning_rate": 4.659227650590876e-05, "loss": 0.8221, "step": 9430 }, { "epoch": 0.5, "learning_rate": 4.658874600599984e-05, "loss": 0.7809, "step": 9435 }, { "epoch": 0.51, "learning_rate": 4.658521381209206e-05, "loss": 0.749, "step": 9440 }, { "epoch": 0.51, "learning_rate": 4.658167992446257e-05, "loss": 0.636, "step": 9445 }, { "epoch": 0.51, "learning_rate": 4.6578144343388676e-05, "loss": 0.7896, "step": 9450 }, { "epoch": 0.51, "learning_rate": 4.6574607069147804e-05, "loss": 0.884, "step": 9455 }, { "epoch": 0.51, "learning_rate": 4.65710681020175e-05, "loss": 0.9028, "step": 9460 }, { "epoch": 0.51, "learning_rate": 4.656752744227547e-05, "loss": 0.7473, "step": 9465 }, { "epoch": 0.51, "learning_rate": 4.656398509019954e-05, "loss": 0.6627, "step": 9470 }, { "epoch": 0.51, "learning_rate": 4.6560441046067636e-05, "loss": 0.7747, "step": 9475 }, { "epoch": 0.51, "learning_rate": 4.6556895310157886e-05, "loss": 0.7177, "step": 9480 }, { "epoch": 0.51, "learning_rate": 4.655334788274849e-05, "loss": 0.5813, "step": 9485 }, { "epoch": 0.51, "learning_rate": 4.6549798764117814e-05, "loss": 0.7672, "step": 9490 }, { "epoch": 0.51, "learning_rate": 4.654624795454433e-05, "loss": 0.8411, "step": 9495 }, { "epoch": 0.51, "learning_rate": 4.654269545430668e-05, "loss": 0.802, "step": 9500 }, { "epoch": 0.51, "learning_rate": 4.653914126368361e-05, "loss": 0.8177, "step": 9505 }, { "epoch": 0.51, "learning_rate": 4.653558538295399e-05, "loss": 0.9998, "step": 9510 }, { "epoch": 0.51, "learning_rate": 4.6532027812396864e-05, "loss": 0.765, "step": 9515 }, { "epoch": 0.51, "learning_rate": 4.652846855229136e-05, "loss": 0.8213, "step": 9520 }, { "epoch": 0.51, "learning_rate": 4.6524907602916776e-05, "loss": 0.7947, "step": 9525 }, { "epoch": 0.51, "learning_rate": 4.652134496455251e-05, "loss": 0.8174, "step": 9530 }, { "epoch": 0.51, "learning_rate": 4.651778063747815e-05, "loss": 0.7767, "step": 9535 }, { "epoch": 0.51, "learning_rate": 4.651421462197333e-05, "loss": 0.8102, "step": 9540 }, { "epoch": 0.51, "learning_rate": 4.65106469183179e-05, "loss": 0.7869, "step": 9545 }, { "epoch": 0.51, "learning_rate": 4.650707752679178e-05, "loss": 0.8698, "step": 9550 }, { "epoch": 0.51, "learning_rate": 4.650350644767507e-05, "loss": 0.7939, "step": 9555 }, { "epoch": 0.51, "learning_rate": 4.649993368124797e-05, "loss": 0.8774, "step": 9560 }, { "epoch": 0.51, "learning_rate": 4.649635922779081e-05, "loss": 0.9838, "step": 9565 }, { "epoch": 0.51, "learning_rate": 4.649278308758409e-05, "loss": 0.8833, "step": 9570 }, { "epoch": 0.51, "learning_rate": 4.648920526090841e-05, "loss": 0.763, "step": 9575 }, { "epoch": 0.51, "learning_rate": 4.6485625748044506e-05, "loss": 0.7466, "step": 9580 }, { "epoch": 0.51, "learning_rate": 4.648204454927325e-05, "loss": 0.759, "step": 9585 }, { "epoch": 0.51, "learning_rate": 4.647846166487566e-05, "loss": 0.6762, "step": 9590 }, { "epoch": 0.51, "learning_rate": 4.6474877095132854e-05, "loss": 0.7819, "step": 9595 }, { "epoch": 0.51, "learning_rate": 4.647129084032612e-05, "loss": 0.7402, "step": 9600 }, { "epoch": 0.51, "learning_rate": 4.646770290073684e-05, "loss": 0.8402, "step": 9605 }, { "epoch": 0.51, "learning_rate": 4.646411327664657e-05, "loss": 0.913, "step": 9610 }, { "epoch": 0.51, "learning_rate": 4.646052196833696e-05, "loss": 0.8166, "step": 9615 }, { "epoch": 0.51, "learning_rate": 4.645692897608982e-05, "loss": 0.8481, "step": 9620 }, { "epoch": 0.51, "learning_rate": 4.645333430018707e-05, "loss": 0.7684, "step": 9625 }, { "epoch": 0.52, "learning_rate": 4.6449737940910766e-05, "loss": 0.6542, "step": 9630 }, { "epoch": 0.52, "learning_rate": 4.6446139898543124e-05, "loss": 0.9037, "step": 9635 }, { "epoch": 0.52, "learning_rate": 4.644254017336645e-05, "loss": 0.7863, "step": 9640 }, { "epoch": 0.52, "learning_rate": 4.643893876566323e-05, "loss": 0.7283, "step": 9645 }, { "epoch": 0.52, "learning_rate": 4.643533567571603e-05, "loss": 0.8001, "step": 9650 }, { "epoch": 0.52, "learning_rate": 4.6431730903807574e-05, "loss": 0.9337, "step": 9655 }, { "epoch": 0.52, "learning_rate": 4.642812445022073e-05, "loss": 0.7278, "step": 9660 }, { "epoch": 0.52, "learning_rate": 4.6424516315238476e-05, "loss": 0.771, "step": 9665 }, { "epoch": 0.52, "learning_rate": 4.642090649914393e-05, "loss": 0.7458, "step": 9670 }, { "epoch": 0.52, "learning_rate": 4.641729500222034e-05, "loss": 0.8958, "step": 9675 }, { "epoch": 0.52, "learning_rate": 4.64136818247511e-05, "loss": 0.7685, "step": 9680 }, { "epoch": 0.52, "learning_rate": 4.6410066967019716e-05, "loss": 0.9039, "step": 9685 }, { "epoch": 0.52, "learning_rate": 4.640645042930985e-05, "loss": 0.7324, "step": 9690 }, { "epoch": 0.52, "learning_rate": 4.6402832211905244e-05, "loss": 0.9173, "step": 9695 }, { "epoch": 0.52, "learning_rate": 4.639921231508983e-05, "loss": 0.8128, "step": 9700 }, { "epoch": 0.52, "learning_rate": 4.639559073914767e-05, "loss": 0.7656, "step": 9705 }, { "epoch": 0.52, "learning_rate": 4.63919674843629e-05, "loss": 0.6259, "step": 9710 }, { "epoch": 0.52, "learning_rate": 4.638834255101985e-05, "loss": 0.7749, "step": 9715 }, { "epoch": 0.52, "learning_rate": 4.638471593940293e-05, "loss": 0.8371, "step": 9720 }, { "epoch": 0.52, "learning_rate": 4.638108764979675e-05, "loss": 0.8643, "step": 9725 }, { "epoch": 0.52, "learning_rate": 4.637745768248597e-05, "loss": 0.8237, "step": 9730 }, { "epoch": 0.52, "learning_rate": 4.6373826037755454e-05, "loss": 0.8107, "step": 9735 }, { "epoch": 0.52, "learning_rate": 4.637019271589014e-05, "loss": 0.8004, "step": 9740 }, { "epoch": 0.52, "learning_rate": 4.636655771717513e-05, "loss": 0.7253, "step": 9745 }, { "epoch": 0.52, "learning_rate": 4.636292104189566e-05, "loss": 0.8304, "step": 9750 }, { "epoch": 0.52, "learning_rate": 4.635928269033708e-05, "loss": 0.8822, "step": 9755 }, { "epoch": 0.52, "learning_rate": 4.635564266278488e-05, "loss": 0.8726, "step": 9760 }, { "epoch": 0.52, "learning_rate": 4.635200095952468e-05, "loss": 0.8136, "step": 9765 }, { "epoch": 0.52, "learning_rate": 4.6348357580842246e-05, "loss": 0.8866, "step": 9770 }, { "epoch": 0.52, "learning_rate": 4.6344712527023435e-05, "loss": 0.804, "step": 9775 }, { "epoch": 0.52, "learning_rate": 4.634106579835429e-05, "loss": 0.671, "step": 9780 }, { "epoch": 0.52, "learning_rate": 4.633741739512095e-05, "loss": 0.8072, "step": 9785 }, { "epoch": 0.52, "learning_rate": 4.633376731760968e-05, "loss": 0.6597, "step": 9790 }, { "epoch": 0.52, "learning_rate": 4.63301155661069e-05, "loss": 0.7634, "step": 9795 }, { "epoch": 0.52, "learning_rate": 4.6326462140899154e-05, "loss": 0.7513, "step": 9800 }, { "epoch": 0.52, "learning_rate": 4.632280704227311e-05, "loss": 0.8291, "step": 9805 }, { "epoch": 0.52, "learning_rate": 4.6319150270515576e-05, "loss": 0.7837, "step": 9810 }, { "epoch": 0.53, "learning_rate": 4.6315491825913484e-05, "loss": 0.8024, "step": 9815 }, { "epoch": 0.53, "learning_rate": 4.63118317087539e-05, "loss": 0.8076, "step": 9820 }, { "epoch": 0.53, "learning_rate": 4.630816991932402e-05, "loss": 0.6843, "step": 9825 }, { "epoch": 0.53, "learning_rate": 4.630450645791118e-05, "loss": 0.7388, "step": 9830 }, { "epoch": 0.53, "learning_rate": 4.630084132480282e-05, "loss": 0.7083, "step": 9835 }, { "epoch": 0.53, "learning_rate": 4.629717452028656e-05, "loss": 0.8184, "step": 9840 }, { "epoch": 0.53, "learning_rate": 4.6293506044650105e-05, "loss": 0.8329, "step": 9845 }, { "epoch": 0.53, "learning_rate": 4.628983589818131e-05, "loss": 0.7568, "step": 9850 }, { "epoch": 0.53, "learning_rate": 4.628616408116816e-05, "loss": 0.9127, "step": 9855 }, { "epoch": 0.53, "learning_rate": 4.628249059389877e-05, "loss": 0.82, "step": 9860 }, { "epoch": 0.53, "learning_rate": 4.627881543666139e-05, "loss": 0.7945, "step": 9865 }, { "epoch": 0.53, "learning_rate": 4.6275138609744396e-05, "loss": 0.7832, "step": 9870 }, { "epoch": 0.53, "learning_rate": 4.6271460113436295e-05, "loss": 0.8366, "step": 9875 }, { "epoch": 0.53, "learning_rate": 4.626777994802572e-05, "loss": 0.6796, "step": 9880 }, { "epoch": 0.53, "learning_rate": 4.626409811380147e-05, "loss": 0.8064, "step": 9885 }, { "epoch": 0.53, "learning_rate": 4.626041461105241e-05, "loss": 0.6502, "step": 9890 }, { "epoch": 0.53, "learning_rate": 4.625672944006759e-05, "loss": 0.7781, "step": 9895 }, { "epoch": 0.53, "learning_rate": 4.625304260113617e-05, "loss": 0.6633, "step": 9900 }, { "epoch": 0.53, "learning_rate": 4.6249354094547456e-05, "loss": 0.9537, "step": 9905 }, { "epoch": 0.53, "learning_rate": 4.6245663920590856e-05, "loss": 0.7638, "step": 9910 }, { "epoch": 0.53, "learning_rate": 4.624197207955594e-05, "loss": 0.6981, "step": 9915 }, { "epoch": 0.53, "learning_rate": 4.6238278571732386e-05, "loss": 0.7026, "step": 9920 }, { "epoch": 0.53, "learning_rate": 4.6234583397410017e-05, "loss": 0.6439, "step": 9925 }, { "epoch": 0.53, "learning_rate": 4.6230886556878775e-05, "loss": 0.8046, "step": 9930 }, { "epoch": 0.53, "learning_rate": 4.6227188050428734e-05, "loss": 0.6273, "step": 9935 }, { "epoch": 0.53, "learning_rate": 4.622348787835012e-05, "loss": 0.7367, "step": 9940 }, { "epoch": 0.53, "learning_rate": 4.621978604093326e-05, "loss": 0.7979, "step": 9945 }, { "epoch": 0.53, "learning_rate": 4.621608253846864e-05, "loss": 0.7118, "step": 9950 }, { "epoch": 0.53, "learning_rate": 4.6212377371246845e-05, "loss": 0.8271, "step": 9955 }, { "epoch": 0.53, "learning_rate": 4.620867053955862e-05, "loss": 0.8968, "step": 9960 }, { "epoch": 0.53, "learning_rate": 4.620496204369482e-05, "loss": 0.779, "step": 9965 }, { "epoch": 0.53, "learning_rate": 4.620125188394644e-05, "loss": 0.8184, "step": 9970 }, { "epoch": 0.53, "learning_rate": 4.619754006060461e-05, "loss": 0.8621, "step": 9975 }, { "epoch": 0.53, "learning_rate": 4.619382657396057e-05, "loss": 0.8337, "step": 9980 }, { "epoch": 0.53, "learning_rate": 4.619011142430574e-05, "loss": 0.8226, "step": 9985 }, { "epoch": 0.53, "learning_rate": 4.618639461193159e-05, "loss": 0.7623, "step": 9990 }, { "epoch": 0.53, "learning_rate": 4.61826761371298e-05, "loss": 0.7875, "step": 9995 }, { "epoch": 0.53, "learning_rate": 4.6178956000192127e-05, "loss": 0.7813, "step": 10000 }, { "epoch": 0.54, "learning_rate": 4.617523420141049e-05, "loss": 0.7784, "step": 10005 }, { "epoch": 0.54, "learning_rate": 4.6171510741076916e-05, "loss": 0.7646, "step": 10010 }, { "epoch": 0.54, "learning_rate": 4.6167785619483585e-05, "loss": 0.7866, "step": 10015 }, { "epoch": 0.54, "learning_rate": 4.616405883692279e-05, "loss": 0.7771, "step": 10020 }, { "epoch": 0.54, "learning_rate": 4.616033039368695e-05, "loss": 0.7228, "step": 10025 }, { "epoch": 0.54, "learning_rate": 4.615660029006864e-05, "loss": 0.8959, "step": 10030 }, { "epoch": 0.54, "learning_rate": 4.615286852636054e-05, "loss": 0.9012, "step": 10035 }, { "epoch": 0.54, "learning_rate": 4.6149135102855466e-05, "loss": 0.7588, "step": 10040 }, { "epoch": 0.54, "learning_rate": 4.614540001984638e-05, "loss": 0.592, "step": 10045 }, { "epoch": 0.54, "learning_rate": 4.614166327762635e-05, "loss": 0.7225, "step": 10050 }, { "epoch": 0.54, "learning_rate": 4.6137924876488596e-05, "loss": 0.7672, "step": 10055 }, { "epoch": 0.54, "learning_rate": 4.6134184816726446e-05, "loss": 0.8455, "step": 10060 }, { "epoch": 0.54, "learning_rate": 4.613044309863338e-05, "loss": 0.8741, "step": 10065 }, { "epoch": 0.54, "learning_rate": 4.6126699722502994e-05, "loss": 0.894, "step": 10070 }, { "epoch": 0.54, "learning_rate": 4.612295468862903e-05, "loss": 0.7773, "step": 10075 }, { "epoch": 0.54, "learning_rate": 4.611920799730532e-05, "loss": 0.7497, "step": 10080 }, { "epoch": 0.54, "learning_rate": 4.611545964882589e-05, "loss": 0.7893, "step": 10085 }, { "epoch": 0.54, "learning_rate": 4.6111709643484844e-05, "loss": 0.821, "step": 10090 }, { "epoch": 0.54, "learning_rate": 4.610795798157642e-05, "loss": 0.9373, "step": 10095 }, { "epoch": 0.54, "learning_rate": 4.610420466339502e-05, "loss": 0.8326, "step": 10100 }, { "epoch": 0.54, "learning_rate": 4.6100449689235145e-05, "loss": 0.7538, "step": 10105 }, { "epoch": 0.54, "learning_rate": 4.6096693059391446e-05, "loss": 0.656, "step": 10110 }, { "epoch": 0.54, "learning_rate": 4.609293477415868e-05, "loss": 0.7189, "step": 10115 }, { "epoch": 0.54, "learning_rate": 4.6089174833831744e-05, "loss": 0.9002, "step": 10120 }, { "epoch": 0.54, "learning_rate": 4.608541323870568e-05, "loss": 0.6934, "step": 10125 }, { "epoch": 0.54, "learning_rate": 4.6081649989075646e-05, "loss": 0.7381, "step": 10130 }, { "epoch": 0.54, "learning_rate": 4.607788508523693e-05, "loss": 0.7228, "step": 10135 }, { "epoch": 0.54, "learning_rate": 4.607411852748495e-05, "loss": 0.7072, "step": 10140 }, { "epoch": 0.54, "learning_rate": 4.6070350316115266e-05, "loss": 0.5865, "step": 10145 }, { "epoch": 0.54, "learning_rate": 4.606658045142355e-05, "loss": 0.8025, "step": 10150 }, { "epoch": 0.54, "learning_rate": 4.6062808933705604e-05, "loss": 0.8481, "step": 10155 }, { "epoch": 0.54, "learning_rate": 4.605903576325737e-05, "loss": 0.7504, "step": 10160 }, { "epoch": 0.54, "learning_rate": 4.6055260940374924e-05, "loss": 0.8906, "step": 10165 }, { "epoch": 0.54, "learning_rate": 4.605148446535446e-05, "loss": 0.7604, "step": 10170 }, { "epoch": 0.54, "learning_rate": 4.60477063384923e-05, "loss": 0.8846, "step": 10175 }, { "epoch": 0.54, "learning_rate": 4.6043926560084916e-05, "loss": 0.7143, "step": 10180 }, { "epoch": 0.54, "learning_rate": 4.604014513042888e-05, "loss": 0.7782, "step": 10185 }, { "epoch": 0.55, "learning_rate": 4.6036362049820914e-05, "loss": 0.7974, "step": 10190 }, { "epoch": 0.55, "learning_rate": 4.603257731855787e-05, "loss": 0.8214, "step": 10195 }, { "epoch": 0.55, "learning_rate": 4.602879093693671e-05, "loss": 0.7688, "step": 10200 }, { "epoch": 0.55, "learning_rate": 4.602500290525455e-05, "loss": 0.7307, "step": 10205 }, { "epoch": 0.55, "learning_rate": 4.6021213223808624e-05, "loss": 0.7982, "step": 10210 }, { "epoch": 0.55, "learning_rate": 4.6017421892896287e-05, "loss": 0.9078, "step": 10215 }, { "epoch": 0.55, "learning_rate": 4.601362891281504e-05, "loss": 0.9685, "step": 10220 }, { "epoch": 0.55, "learning_rate": 4.600983428386251e-05, "loss": 0.7428, "step": 10225 }, { "epoch": 0.55, "learning_rate": 4.600603800633645e-05, "loss": 0.8188, "step": 10230 }, { "epoch": 0.55, "learning_rate": 4.600224008053472e-05, "loss": 0.6804, "step": 10235 }, { "epoch": 0.55, "learning_rate": 4.5998440506755356e-05, "loss": 0.8504, "step": 10240 }, { "epoch": 0.55, "learning_rate": 4.599463928529649e-05, "loss": 0.9281, "step": 10245 }, { "epoch": 0.55, "learning_rate": 4.5990836416456385e-05, "loss": 0.7945, "step": 10250 }, { "epoch": 0.55, "learning_rate": 4.598703190053344e-05, "loss": 0.6668, "step": 10255 }, { "epoch": 0.55, "learning_rate": 4.59832257378262e-05, "loss": 0.8056, "step": 10260 }, { "epoch": 0.55, "learning_rate": 4.5979417928633305e-05, "loss": 0.7964, "step": 10265 }, { "epoch": 0.55, "learning_rate": 4.597560847325355e-05, "loss": 1.035, "step": 10270 }, { "epoch": 0.55, "learning_rate": 4.597179737198584e-05, "loss": 0.8906, "step": 10275 }, { "epoch": 0.55, "learning_rate": 4.5967984625129235e-05, "loss": 0.7478, "step": 10280 }, { "epoch": 0.55, "learning_rate": 4.59641702329829e-05, "loss": 0.8341, "step": 10285 }, { "epoch": 0.55, "learning_rate": 4.5960354195846136e-05, "loss": 0.7146, "step": 10290 }, { "epoch": 0.55, "learning_rate": 4.595653651401838e-05, "loss": 0.7813, "step": 10295 }, { "epoch": 0.55, "learning_rate": 4.595271718779919e-05, "loss": 0.7597, "step": 10300 }, { "epoch": 0.55, "learning_rate": 4.594889621748825e-05, "loss": 0.8173, "step": 10305 }, { "epoch": 0.55, "learning_rate": 4.594507360338539e-05, "loss": 0.6584, "step": 10310 }, { "epoch": 0.55, "learning_rate": 4.594124934579056e-05, "loss": 0.7892, "step": 10315 }, { "epoch": 0.55, "learning_rate": 4.593742344500384e-05, "loss": 0.7498, "step": 10320 }, { "epoch": 0.55, "learning_rate": 4.593359590132541e-05, "loss": 0.7586, "step": 10325 }, { "epoch": 0.55, "learning_rate": 4.592976671505563e-05, "loss": 0.7721, "step": 10330 }, { "epoch": 0.55, "learning_rate": 4.5925935886494955e-05, "loss": 0.7091, "step": 10335 }, { "epoch": 0.55, "learning_rate": 4.5922103415943986e-05, "loss": 0.8232, "step": 10340 }, { "epoch": 0.55, "learning_rate": 4.5918269303703425e-05, "loss": 0.7667, "step": 10345 }, { "epoch": 0.55, "learning_rate": 4.5914433550074145e-05, "loss": 0.8969, "step": 10350 }, { "epoch": 0.55, "learning_rate": 4.591059615535711e-05, "loss": 0.8925, "step": 10355 }, { "epoch": 0.55, "learning_rate": 4.5906757119853435e-05, "loss": 0.7632, "step": 10360 }, { "epoch": 0.55, "learning_rate": 4.5902916443864354e-05, "loss": 0.8219, "step": 10365 }, { "epoch": 0.55, "learning_rate": 4.589907412769123e-05, "loss": 0.9253, "step": 10370 }, { "epoch": 0.56, "learning_rate": 4.589523017163557e-05, "loss": 0.7863, "step": 10375 }, { "epoch": 0.56, "learning_rate": 4.5891384575998974e-05, "loss": 0.8144, "step": 10380 }, { "epoch": 0.56, "learning_rate": 4.588753734108321e-05, "loss": 0.6786, "step": 10385 }, { "epoch": 0.56, "learning_rate": 4.588368846719016e-05, "loss": 0.7238, "step": 10390 }, { "epoch": 0.56, "learning_rate": 4.587983795462183e-05, "loss": 0.7105, "step": 10395 }, { "epoch": 0.56, "learning_rate": 4.587598580368034e-05, "loss": 0.8256, "step": 10400 }, { "epoch": 0.56, "learning_rate": 4.587213201466798e-05, "loss": 0.738, "step": 10405 }, { "epoch": 0.56, "learning_rate": 4.586827658788714e-05, "loss": 0.6176, "step": 10410 }, { "epoch": 0.56, "learning_rate": 4.586441952364032e-05, "loss": 0.759, "step": 10415 }, { "epoch": 0.56, "learning_rate": 4.5860560822230206e-05, "loss": 0.795, "step": 10420 }, { "epoch": 0.56, "learning_rate": 4.585670048395955e-05, "loss": 0.7823, "step": 10425 }, { "epoch": 0.56, "learning_rate": 4.585283850913128e-05, "loss": 0.8893, "step": 10430 }, { "epoch": 0.56, "learning_rate": 4.584897489804841e-05, "loss": 0.7336, "step": 10435 }, { "epoch": 0.56, "learning_rate": 4.584510965101413e-05, "loss": 0.7736, "step": 10440 }, { "epoch": 0.56, "learning_rate": 4.5841242768331713e-05, "loss": 0.8292, "step": 10445 }, { "epoch": 0.56, "learning_rate": 4.583737425030459e-05, "loss": 0.8093, "step": 10450 }, { "epoch": 0.56, "learning_rate": 4.583350409723631e-05, "loss": 0.8556, "step": 10455 }, { "epoch": 0.56, "learning_rate": 4.582963230943056e-05, "loss": 0.7383, "step": 10460 }, { "epoch": 0.56, "learning_rate": 4.582575888719113e-05, "loss": 0.8949, "step": 10465 }, { "epoch": 0.56, "learning_rate": 4.5821883830821966e-05, "loss": 0.8084, "step": 10470 }, { "epoch": 0.56, "learning_rate": 4.581800714062713e-05, "loss": 0.7045, "step": 10475 }, { "epoch": 0.56, "learning_rate": 4.5814128816910805e-05, "loss": 0.7557, "step": 10480 }, { "epoch": 0.56, "learning_rate": 4.581024885997732e-05, "loss": 0.859, "step": 10485 }, { "epoch": 0.56, "learning_rate": 4.5806367270131125e-05, "loss": 0.7317, "step": 10490 }, { "epoch": 0.56, "learning_rate": 4.5802484047676784e-05, "loss": 0.8322, "step": 10495 }, { "epoch": 0.56, "learning_rate": 4.5798599192919014e-05, "loss": 0.6434, "step": 10500 }, { "epoch": 0.56, "learning_rate": 4.579471270616264e-05, "loss": 0.8798, "step": 10505 }, { "epoch": 0.56, "learning_rate": 4.579082458771261e-05, "loss": 0.731, "step": 10510 }, { "epoch": 0.56, "learning_rate": 4.578693483787404e-05, "loss": 0.7318, "step": 10515 }, { "epoch": 0.56, "learning_rate": 4.5783043456952126e-05, "loss": 0.8649, "step": 10520 }, { "epoch": 0.56, "learning_rate": 4.577915044525221e-05, "loss": 0.7678, "step": 10525 }, { "epoch": 0.56, "learning_rate": 4.5775255803079776e-05, "loss": 0.6588, "step": 10530 }, { "epoch": 0.56, "learning_rate": 4.577135953074042e-05, "loss": 0.764, "step": 10535 }, { "epoch": 0.56, "learning_rate": 4.576746162853986e-05, "loss": 0.9417, "step": 10540 }, { "epoch": 0.56, "learning_rate": 4.576356209678396e-05, "loss": 0.7035, "step": 10545 }, { "epoch": 0.56, "learning_rate": 4.5759660935778716e-05, "loss": 0.7982, "step": 10550 }, { "epoch": 0.56, "learning_rate": 4.575575814583022e-05, "loss": 0.6423, "step": 10555 }, { "epoch": 0.56, "learning_rate": 4.575185372724472e-05, "loss": 0.7089, "step": 10560 }, { "epoch": 0.57, "learning_rate": 4.5747947680328574e-05, "loss": 0.6855, "step": 10565 }, { "epoch": 0.57, "learning_rate": 4.574404000538829e-05, "loss": 0.784, "step": 10570 }, { "epoch": 0.57, "learning_rate": 4.574013070273049e-05, "loss": 0.7017, "step": 10575 }, { "epoch": 0.57, "learning_rate": 4.5736219772661906e-05, "loss": 0.7412, "step": 10580 }, { "epoch": 0.57, "learning_rate": 4.573230721548944e-05, "loss": 0.8399, "step": 10585 }, { "epoch": 0.57, "learning_rate": 4.572839303152008e-05, "loss": 0.75, "step": 10590 }, { "epoch": 0.57, "learning_rate": 4.5724477221060965e-05, "loss": 0.8845, "step": 10595 }, { "epoch": 0.57, "learning_rate": 4.572055978441937e-05, "loss": 0.7167, "step": 10600 }, { "epoch": 0.57, "learning_rate": 4.571664072190266e-05, "loss": 0.8206, "step": 10605 }, { "epoch": 0.57, "learning_rate": 4.571272003381836e-05, "loss": 0.7914, "step": 10610 }, { "epoch": 0.57, "learning_rate": 4.570879772047412e-05, "loss": 0.9396, "step": 10615 }, { "epoch": 0.57, "learning_rate": 4.5704873782177704e-05, "loss": 0.8118, "step": 10620 }, { "epoch": 0.57, "learning_rate": 4.5700948219237015e-05, "loss": 0.7251, "step": 10625 }, { "epoch": 0.57, "learning_rate": 4.569702103196008e-05, "loss": 0.7845, "step": 10630 }, { "epoch": 0.57, "learning_rate": 4.569309222065505e-05, "loss": 0.68, "step": 10635 }, { "epoch": 0.57, "learning_rate": 4.56891617856302e-05, "loss": 0.7675, "step": 10640 }, { "epoch": 0.57, "learning_rate": 4.568522972719395e-05, "loss": 0.663, "step": 10645 }, { "epoch": 0.57, "learning_rate": 4.568129604565483e-05, "loss": 0.7192, "step": 10650 }, { "epoch": 0.57, "learning_rate": 4.5677360741321495e-05, "loss": 0.7118, "step": 10655 }, { "epoch": 0.57, "learning_rate": 4.5673423814502754e-05, "loss": 0.7883, "step": 10660 }, { "epoch": 0.57, "learning_rate": 4.566948526550751e-05, "loss": 0.7901, "step": 10665 }, { "epoch": 0.57, "learning_rate": 4.566554509464482e-05, "loss": 0.7413, "step": 10670 }, { "epoch": 0.57, "learning_rate": 4.566160330222384e-05, "loss": 0.8076, "step": 10675 }, { "epoch": 0.57, "learning_rate": 4.565765988855389e-05, "loss": 0.7808, "step": 10680 }, { "epoch": 0.57, "learning_rate": 4.565371485394438e-05, "loss": 0.7337, "step": 10685 }, { "epoch": 0.57, "learning_rate": 4.5649768198704867e-05, "loss": 0.768, "step": 10690 }, { "epoch": 0.57, "learning_rate": 4.564581992314504e-05, "loss": 0.6409, "step": 10695 }, { "epoch": 0.57, "learning_rate": 4.564187002757471e-05, "loss": 0.9355, "step": 10700 }, { "epoch": 0.57, "learning_rate": 4.563791851230379e-05, "loss": 0.9525, "step": 10705 }, { "epoch": 0.57, "learning_rate": 4.563396537764238e-05, "loss": 0.7921, "step": 10710 }, { "epoch": 0.57, "learning_rate": 4.563001062390062e-05, "loss": 0.6906, "step": 10715 }, { "epoch": 0.57, "learning_rate": 4.562605425138887e-05, "loss": 0.8456, "step": 10720 }, { "epoch": 0.57, "learning_rate": 4.5622096260417556e-05, "loss": 0.7792, "step": 10725 }, { "epoch": 0.57, "learning_rate": 4.5618136651297255e-05, "loss": 0.6354, "step": 10730 }, { "epoch": 0.57, "learning_rate": 4.561417542433866e-05, "loss": 0.7007, "step": 10735 }, { "epoch": 0.57, "learning_rate": 4.561021257985259e-05, "loss": 0.8787, "step": 10740 }, { "epoch": 0.57, "learning_rate": 4.5606248118150005e-05, "loss": 0.7196, "step": 10745 }, { "epoch": 0.58, "learning_rate": 4.5602282039541984e-05, "loss": 0.938, "step": 10750 }, { "epoch": 0.58, "learning_rate": 4.559831434433973e-05, "loss": 0.8162, "step": 10755 }, { "epoch": 0.58, "learning_rate": 4.5594345032854566e-05, "loss": 0.8315, "step": 10760 }, { "epoch": 0.58, "learning_rate": 4.559037410539797e-05, "loss": 0.9728, "step": 10765 }, { "epoch": 0.58, "learning_rate": 4.558640156228151e-05, "loss": 0.9368, "step": 10770 }, { "epoch": 0.58, "learning_rate": 4.5582427403816906e-05, "loss": 0.7606, "step": 10775 }, { "epoch": 0.58, "learning_rate": 4.557845163031601e-05, "loss": 0.7466, "step": 10780 }, { "epoch": 0.58, "learning_rate": 4.557447424209076e-05, "loss": 0.9268, "step": 10785 }, { "epoch": 0.58, "learning_rate": 4.557049523945327e-05, "loss": 0.8981, "step": 10790 }, { "epoch": 0.58, "learning_rate": 4.556651462271575e-05, "loss": 0.98, "step": 10795 }, { "epoch": 0.58, "learning_rate": 4.5562532392190556e-05, "loss": 0.8729, "step": 10800 }, { "epoch": 0.58, "learning_rate": 4.555854854819015e-05, "loss": 0.6925, "step": 10805 }, { "epoch": 0.58, "learning_rate": 4.555456309102714e-05, "loss": 0.8089, "step": 10810 }, { "epoch": 0.58, "learning_rate": 4.555057602101424e-05, "loss": 0.6679, "step": 10815 }, { "epoch": 0.58, "learning_rate": 4.5546587338464316e-05, "loss": 0.7568, "step": 10820 }, { "epoch": 0.58, "learning_rate": 4.554259704369034e-05, "loss": 0.8799, "step": 10825 }, { "epoch": 0.58, "learning_rate": 4.5538605137005416e-05, "loss": 0.827, "step": 10830 }, { "epoch": 0.58, "learning_rate": 4.553461161872278e-05, "loss": 0.7771, "step": 10835 }, { "epoch": 0.58, "learning_rate": 4.5530616489155785e-05, "loss": 0.6974, "step": 10840 }, { "epoch": 0.58, "learning_rate": 4.552661974861793e-05, "loss": 0.8431, "step": 10845 }, { "epoch": 0.58, "learning_rate": 4.5522621397422805e-05, "loss": 0.818, "step": 10850 }, { "epoch": 0.58, "learning_rate": 4.551862143588416e-05, "loss": 0.8314, "step": 10855 }, { "epoch": 0.58, "learning_rate": 4.551461986431587e-05, "loss": 0.7711, "step": 10860 }, { "epoch": 0.58, "learning_rate": 4.551061668303189e-05, "loss": 0.7723, "step": 10865 }, { "epoch": 0.58, "learning_rate": 4.550661189234638e-05, "loss": 0.897, "step": 10870 }, { "epoch": 0.58, "learning_rate": 4.550260549257356e-05, "loss": 0.7719, "step": 10875 }, { "epoch": 0.58, "learning_rate": 4.549859748402779e-05, "loss": 0.6446, "step": 10880 }, { "epoch": 0.58, "learning_rate": 4.549458786702358e-05, "loss": 0.91, "step": 10885 }, { "epoch": 0.58, "learning_rate": 4.549057664187556e-05, "loss": 0.7295, "step": 10890 }, { "epoch": 0.58, "learning_rate": 4.5486563808898465e-05, "loss": 0.7739, "step": 10895 }, { "epoch": 0.58, "learning_rate": 4.548254936840716e-05, "loss": 0.8616, "step": 10900 }, { "epoch": 0.58, "learning_rate": 4.5478533320716664e-05, "loss": 0.6855, "step": 10905 }, { "epoch": 0.58, "learning_rate": 4.547451566614209e-05, "loss": 0.7904, "step": 10910 }, { "epoch": 0.58, "learning_rate": 4.5470496404998694e-05, "loss": 0.7656, "step": 10915 }, { "epoch": 0.58, "learning_rate": 4.546647553760186e-05, "loss": 0.6199, "step": 10920 }, { "epoch": 0.58, "learning_rate": 4.5462453064267085e-05, "loss": 0.7672, "step": 10925 }, { "epoch": 0.58, "learning_rate": 4.545842898531001e-05, "loss": 0.9704, "step": 10930 }, { "epoch": 0.58, "learning_rate": 4.5454403301046376e-05, "loss": 0.8178, "step": 10935 }, { "epoch": 0.59, "learning_rate": 4.5450376011792076e-05, "loss": 0.7388, "step": 10940 }, { "epoch": 0.59, "learning_rate": 4.544634711786312e-05, "loss": 0.8091, "step": 10945 }, { "epoch": 0.59, "learning_rate": 4.544231661957563e-05, "loss": 0.8149, "step": 10950 }, { "epoch": 0.59, "learning_rate": 4.543828451724588e-05, "loss": 0.6827, "step": 10955 }, { "epoch": 0.59, "learning_rate": 4.543425081119024e-05, "loss": 0.7941, "step": 10960 }, { "epoch": 0.59, "learning_rate": 4.543021550172524e-05, "loss": 0.8306, "step": 10965 }, { "epoch": 0.59, "learning_rate": 4.542617858916751e-05, "loss": 0.9001, "step": 10970 }, { "epoch": 0.59, "learning_rate": 4.542214007383381e-05, "loss": 0.6837, "step": 10975 }, { "epoch": 0.59, "learning_rate": 4.541809995604104e-05, "loss": 0.758, "step": 10980 }, { "epoch": 0.59, "learning_rate": 4.541405823610619e-05, "loss": 0.8964, "step": 10985 }, { "epoch": 0.59, "learning_rate": 4.541001491434643e-05, "loss": 0.8149, "step": 10990 }, { "epoch": 0.59, "learning_rate": 4.540596999107901e-05, "loss": 0.5443, "step": 10995 }, { "epoch": 0.59, "learning_rate": 4.540192346662133e-05, "loss": 0.7648, "step": 11000 }, { "epoch": 0.59, "learning_rate": 4.5397875341290906e-05, "loss": 0.7131, "step": 11005 }, { "epoch": 0.59, "learning_rate": 4.539382561540537e-05, "loss": 0.714, "step": 11010 }, { "epoch": 0.59, "learning_rate": 4.5389774289282506e-05, "loss": 0.6926, "step": 11015 }, { "epoch": 0.59, "learning_rate": 4.5385721363240205e-05, "loss": 0.7779, "step": 11020 }, { "epoch": 0.59, "learning_rate": 4.538166683759648e-05, "loss": 0.7415, "step": 11025 }, { "epoch": 0.59, "learning_rate": 4.537761071266948e-05, "loss": 0.7751, "step": 11030 }, { "epoch": 0.59, "learning_rate": 4.537355298877747e-05, "loss": 0.8838, "step": 11035 }, { "epoch": 0.59, "learning_rate": 4.536949366623887e-05, "loss": 0.723, "step": 11040 }, { "epoch": 0.59, "learning_rate": 4.5365432745372173e-05, "loss": 1.0116, "step": 11045 }, { "epoch": 0.59, "learning_rate": 4.5361370226496034e-05, "loss": 0.7479, "step": 11050 }, { "epoch": 0.59, "learning_rate": 4.535730610992924e-05, "loss": 0.7617, "step": 11055 }, { "epoch": 0.59, "learning_rate": 4.535324039599067e-05, "loss": 0.8237, "step": 11060 }, { "epoch": 0.59, "learning_rate": 4.5349173084999366e-05, "loss": 0.6832, "step": 11065 }, { "epoch": 0.59, "learning_rate": 4.534510417727445e-05, "loss": 0.7419, "step": 11070 }, { "epoch": 0.59, "learning_rate": 4.5341033673135235e-05, "loss": 0.7406, "step": 11075 }, { "epoch": 0.59, "learning_rate": 4.5336961572901084e-05, "loss": 0.704, "step": 11080 }, { "epoch": 0.59, "learning_rate": 4.533288787689154e-05, "loss": 0.854, "step": 11085 }, { "epoch": 0.59, "learning_rate": 4.532881258542625e-05, "loss": 0.8356, "step": 11090 }, { "epoch": 0.59, "learning_rate": 4.532473569882498e-05, "loss": 0.7699, "step": 11095 }, { "epoch": 0.59, "learning_rate": 4.5320657217407645e-05, "loss": 0.7127, "step": 11100 }, { "epoch": 0.59, "learning_rate": 4.531657714149427e-05, "loss": 0.7333, "step": 11105 }, { "epoch": 0.59, "learning_rate": 4.531249547140498e-05, "loss": 0.8154, "step": 11110 }, { "epoch": 0.59, "learning_rate": 4.530841220746008e-05, "loss": 0.7927, "step": 11115 }, { "epoch": 0.59, "learning_rate": 4.530432734997996e-05, "loss": 0.6829, "step": 11120 }, { "epoch": 0.6, "learning_rate": 4.530024089928513e-05, "loss": 0.6931, "step": 11125 }, { "epoch": 0.6, "learning_rate": 4.5296152855696275e-05, "loss": 0.6714, "step": 11130 }, { "epoch": 0.6, "learning_rate": 4.529206321953413e-05, "loss": 0.8873, "step": 11135 }, { "epoch": 0.6, "learning_rate": 4.5287971991119626e-05, "loss": 0.7947, "step": 11140 }, { "epoch": 0.6, "learning_rate": 4.528387917077378e-05, "loss": 0.7713, "step": 11145 }, { "epoch": 0.6, "learning_rate": 4.527978475881774e-05, "loss": 0.851, "step": 11150 }, { "epoch": 0.6, "learning_rate": 4.527568875557278e-05, "loss": 0.7505, "step": 11155 }, { "epoch": 0.6, "learning_rate": 4.52715911613603e-05, "loss": 0.7439, "step": 11160 }, { "epoch": 0.6, "learning_rate": 4.5267491976501834e-05, "loss": 0.7772, "step": 11165 }, { "epoch": 0.6, "learning_rate": 4.5263391201319016e-05, "loss": 0.7568, "step": 11170 }, { "epoch": 0.6, "learning_rate": 4.5259288836133635e-05, "loss": 0.9697, "step": 11175 }, { "epoch": 0.6, "learning_rate": 4.525518488126758e-05, "loss": 0.8261, "step": 11180 }, { "epoch": 0.6, "learning_rate": 4.525107933704289e-05, "loss": 0.7863, "step": 11185 }, { "epoch": 0.6, "learning_rate": 4.524697220378169e-05, "loss": 0.735, "step": 11190 }, { "epoch": 0.6, "learning_rate": 4.524286348180627e-05, "loss": 0.8028, "step": 11195 }, { "epoch": 0.6, "learning_rate": 4.5238753171439024e-05, "loss": 0.8267, "step": 11200 }, { "epoch": 0.6, "learning_rate": 4.5234641273002474e-05, "loss": 0.6984, "step": 11205 }, { "epoch": 0.6, "learning_rate": 4.523052778681928e-05, "loss": 0.8856, "step": 11210 }, { "epoch": 0.6, "learning_rate": 4.522641271321219e-05, "loss": 0.9242, "step": 11215 }, { "epoch": 0.6, "learning_rate": 4.522229605250412e-05, "loss": 0.6925, "step": 11220 }, { "epoch": 0.6, "learning_rate": 4.521817780501808e-05, "loss": 0.738, "step": 11225 }, { "epoch": 0.6, "learning_rate": 4.5214057971077216e-05, "loss": 0.837, "step": 11230 }, { "epoch": 0.6, "learning_rate": 4.520993655100481e-05, "loss": 0.8385, "step": 11235 }, { "epoch": 0.6, "learning_rate": 4.5205813545124244e-05, "loss": 0.7381, "step": 11240 }, { "epoch": 0.6, "learning_rate": 4.520168895375905e-05, "loss": 0.8823, "step": 11245 }, { "epoch": 0.6, "learning_rate": 4.519756277723285e-05, "loss": 0.6183, "step": 11250 }, { "epoch": 0.6, "learning_rate": 4.519343501586943e-05, "loss": 0.7952, "step": 11255 }, { "epoch": 0.6, "learning_rate": 4.518930566999267e-05, "loss": 0.6961, "step": 11260 }, { "epoch": 0.6, "learning_rate": 4.51851747399266e-05, "loss": 0.8361, "step": 11265 }, { "epoch": 0.6, "learning_rate": 4.5181042225995344e-05, "loss": 0.6481, "step": 11270 }, { "epoch": 0.6, "learning_rate": 4.517690812852319e-05, "loss": 0.8791, "step": 11275 }, { "epoch": 0.6, "learning_rate": 4.51727724478345e-05, "loss": 0.8789, "step": 11280 }, { "epoch": 0.6, "learning_rate": 4.5168635184253805e-05, "loss": 0.8676, "step": 11285 }, { "epoch": 0.6, "learning_rate": 4.5165324233925155e-05, "loss": 0.8545, "step": 11290 }, { "epoch": 0.6, "learning_rate": 4.516118412195701e-05, "loss": 0.6939, "step": 11295 }, { "epoch": 0.6, "learning_rate": 4.5157042428006165e-05, "loss": 0.79, "step": 11300 }, { "epoch": 0.6, "learning_rate": 4.515289915239759e-05, "loss": 0.7704, "step": 11305 }, { "epoch": 0.61, "learning_rate": 4.514875429545639e-05, "loss": 0.8389, "step": 11310 }, { "epoch": 0.61, "learning_rate": 4.514460785750782e-05, "loss": 0.9743, "step": 11315 }, { "epoch": 0.61, "learning_rate": 4.514045983887721e-05, "loss": 0.6996, "step": 11320 }, { "epoch": 0.61, "learning_rate": 4.513631023989007e-05, "loss": 0.905, "step": 11325 }, { "epoch": 0.61, "learning_rate": 4.5132159060871985e-05, "loss": 0.6121, "step": 11330 }, { "epoch": 0.61, "learning_rate": 4.512800630214869e-05, "loss": 0.7722, "step": 11335 }, { "epoch": 0.61, "learning_rate": 4.5123851964046045e-05, "loss": 0.7694, "step": 11340 }, { "epoch": 0.61, "learning_rate": 4.511969604689001e-05, "loss": 0.8299, "step": 11345 }, { "epoch": 0.61, "learning_rate": 4.511553855100671e-05, "loss": 0.8293, "step": 11350 }, { "epoch": 0.61, "learning_rate": 4.511137947672236e-05, "loss": 0.662, "step": 11355 }, { "epoch": 0.61, "learning_rate": 4.5107218824363306e-05, "loss": 0.88, "step": 11360 }, { "epoch": 0.61, "learning_rate": 4.510305659425602e-05, "loss": 0.9379, "step": 11365 }, { "epoch": 0.61, "learning_rate": 4.509889278672711e-05, "loss": 0.7759, "step": 11370 }, { "epoch": 0.61, "learning_rate": 4.509472740210328e-05, "loss": 0.8812, "step": 11375 }, { "epoch": 0.61, "learning_rate": 4.509056044071138e-05, "loss": 0.8354, "step": 11380 }, { "epoch": 0.61, "learning_rate": 4.508639190287839e-05, "loss": 0.8905, "step": 11385 }, { "epoch": 0.61, "learning_rate": 4.5082221788931384e-05, "loss": 0.7021, "step": 11390 }, { "epoch": 0.61, "learning_rate": 4.507805009919759e-05, "loss": 0.7593, "step": 11395 }, { "epoch": 0.61, "learning_rate": 4.507387683400434e-05, "loss": 0.8714, "step": 11400 }, { "epoch": 0.61, "learning_rate": 4.506970199367909e-05, "loss": 0.671, "step": 11405 }, { "epoch": 0.61, "learning_rate": 4.506552557854945e-05, "loss": 0.6942, "step": 11410 }, { "epoch": 0.61, "learning_rate": 4.50613475889431e-05, "loss": 0.6638, "step": 11415 }, { "epoch": 0.61, "learning_rate": 4.50571680251879e-05, "loss": 0.7896, "step": 11420 }, { "epoch": 0.61, "learning_rate": 4.505298688761179e-05, "loss": 0.7905, "step": 11425 }, { "epoch": 0.61, "learning_rate": 4.5048804176542855e-05, "loss": 0.8022, "step": 11430 }, { "epoch": 0.61, "learning_rate": 4.5044619892309295e-05, "loss": 0.7792, "step": 11435 }, { "epoch": 0.61, "learning_rate": 4.504043403523944e-05, "loss": 0.6195, "step": 11440 }, { "epoch": 0.61, "learning_rate": 4.5036246605661754e-05, "loss": 0.7328, "step": 11445 }, { "epoch": 0.61, "learning_rate": 4.5032057603904785e-05, "loss": 0.757, "step": 11450 }, { "epoch": 0.61, "learning_rate": 4.5027867030297257e-05, "loss": 0.7619, "step": 11455 }, { "epoch": 0.61, "learning_rate": 4.5023674885167966e-05, "loss": 0.7739, "step": 11460 }, { "epoch": 0.61, "learning_rate": 4.501948116884587e-05, "loss": 0.8146, "step": 11465 }, { "epoch": 0.61, "learning_rate": 4.501528588166004e-05, "loss": 0.7016, "step": 11470 }, { "epoch": 0.61, "learning_rate": 4.5011089023939655e-05, "loss": 0.8032, "step": 11475 }, { "epoch": 0.61, "learning_rate": 4.500689059601403e-05, "loss": 0.7545, "step": 11480 }, { "epoch": 0.61, "learning_rate": 4.5002690598212616e-05, "loss": 0.8386, "step": 11485 }, { "epoch": 0.61, "learning_rate": 4.499848903086497e-05, "loss": 0.7125, "step": 11490 }, { "epoch": 0.61, "learning_rate": 4.499428589430075e-05, "loss": 0.7925, "step": 11495 }, { "epoch": 0.62, "learning_rate": 4.499008118884979e-05, "loss": 0.7435, "step": 11500 }, { "epoch": 0.62, "learning_rate": 4.4985874914842015e-05, "loss": 0.8457, "step": 11505 }, { "epoch": 0.62, "learning_rate": 4.498166707260747e-05, "loss": 0.8124, "step": 11510 }, { "epoch": 0.62, "learning_rate": 4.497745766247634e-05, "loss": 0.8022, "step": 11515 }, { "epoch": 0.62, "learning_rate": 4.4973246684778905e-05, "loss": 0.7935, "step": 11520 }, { "epoch": 0.62, "learning_rate": 4.496903413984561e-05, "loss": 0.7948, "step": 11525 }, { "epoch": 0.62, "learning_rate": 4.496482002800699e-05, "loss": 0.7123, "step": 11530 }, { "epoch": 0.62, "learning_rate": 4.496060434959371e-05, "loss": 0.6831, "step": 11535 }, { "epoch": 0.62, "learning_rate": 4.495638710493656e-05, "loss": 0.5735, "step": 11540 }, { "epoch": 0.62, "learning_rate": 4.495216829436646e-05, "loss": 0.6868, "step": 11545 }, { "epoch": 0.62, "learning_rate": 4.4947947918214444e-05, "loss": 0.8139, "step": 11550 }, { "epoch": 0.62, "learning_rate": 4.4943725976811666e-05, "loss": 0.7321, "step": 11555 }, { "epoch": 0.62, "learning_rate": 4.493950247048942e-05, "loss": 0.6833, "step": 11560 }, { "epoch": 0.62, "learning_rate": 4.49361225389123e-05, "loss": 0.7848, "step": 11565 }, { "epoch": 0.62, "learning_rate": 4.4931896216570216e-05, "loss": 0.733, "step": 11570 }, { "epoch": 0.62, "learning_rate": 4.4927668330236895e-05, "loss": 0.7849, "step": 11575 }, { "epoch": 0.62, "learning_rate": 4.4923438880244094e-05, "loss": 0.8142, "step": 11580 }, { "epoch": 0.62, "learning_rate": 4.4919207866923674e-05, "loss": 0.8539, "step": 11585 }, { "epoch": 0.62, "learning_rate": 4.4914975290607644e-05, "loss": 0.8426, "step": 11590 }, { "epoch": 0.62, "learning_rate": 4.49107411516281e-05, "loss": 0.8092, "step": 11595 }, { "epoch": 0.62, "learning_rate": 4.49065054503173e-05, "loss": 0.8492, "step": 11600 }, { "epoch": 0.62, "learning_rate": 4.49022681870076e-05, "loss": 0.7114, "step": 11605 }, { "epoch": 0.62, "learning_rate": 4.4898029362031486e-05, "loss": 0.7255, "step": 11610 }, { "epoch": 0.62, "learning_rate": 4.489378897572155e-05, "loss": 0.6828, "step": 11615 }, { "epoch": 0.62, "learning_rate": 4.488954702841054e-05, "loss": 0.8522, "step": 11620 }, { "epoch": 0.62, "learning_rate": 4.48853035204313e-05, "loss": 0.7798, "step": 11625 }, { "epoch": 0.62, "learning_rate": 4.4881058452116803e-05, "loss": 0.6972, "step": 11630 }, { "epoch": 0.62, "learning_rate": 4.487681182380015e-05, "loss": 0.7316, "step": 11635 }, { "epoch": 0.62, "learning_rate": 4.4872563635814555e-05, "loss": 0.7366, "step": 11640 }, { "epoch": 0.62, "learning_rate": 4.486831388849336e-05, "loss": 0.7457, "step": 11645 }, { "epoch": 0.62, "learning_rate": 4.486406258217003e-05, "loss": 0.8564, "step": 11650 }, { "epoch": 0.62, "learning_rate": 4.485980971717816e-05, "loss": 0.7643, "step": 11655 }, { "epoch": 0.62, "learning_rate": 4.4855555293851445e-05, "loss": 0.7106, "step": 11660 }, { "epoch": 0.62, "learning_rate": 4.485129931252373e-05, "loss": 0.7829, "step": 11665 }, { "epoch": 0.62, "learning_rate": 4.484704177352895e-05, "loss": 0.9266, "step": 11670 }, { "epoch": 0.62, "learning_rate": 4.484278267720119e-05, "loss": 0.9619, "step": 11675 }, { "epoch": 0.62, "learning_rate": 4.4838522023874655e-05, "loss": 0.9012, "step": 11680 }, { "epoch": 0.63, "learning_rate": 4.483425981388365e-05, "loss": 0.8963, "step": 11685 }, { "epoch": 0.63, "learning_rate": 4.4829996047562626e-05, "loss": 0.7888, "step": 11690 }, { "epoch": 0.63, "learning_rate": 4.482573072524615e-05, "loss": 0.8111, "step": 11695 }, { "epoch": 0.63, "learning_rate": 4.482146384726889e-05, "loss": 0.804, "step": 11700 }, { "epoch": 0.63, "learning_rate": 4.4817195413965684e-05, "loss": 0.7884, "step": 11705 }, { "epoch": 0.63, "learning_rate": 4.4812925425671435e-05, "loss": 0.8359, "step": 11710 }, { "epoch": 0.63, "learning_rate": 4.4808653882721205e-05, "loss": 0.8497, "step": 11715 }, { "epoch": 0.63, "learning_rate": 4.480438078545017e-05, "loss": 0.7625, "step": 11720 }, { "epoch": 0.63, "learning_rate": 4.480010613419363e-05, "loss": 0.8469, "step": 11725 }, { "epoch": 0.63, "learning_rate": 4.4795829929286983e-05, "loss": 0.6867, "step": 11730 }, { "epoch": 0.63, "learning_rate": 4.4791552171065793e-05, "loss": 0.7706, "step": 11735 }, { "epoch": 0.63, "learning_rate": 4.478727285986571e-05, "loss": 0.7705, "step": 11740 }, { "epoch": 0.63, "learning_rate": 4.4782991996022516e-05, "loss": 0.7344, "step": 11745 }, { "epoch": 0.63, "learning_rate": 4.4778709579872125e-05, "loss": 0.649, "step": 11750 }, { "epoch": 0.63, "learning_rate": 4.477442561175056e-05, "loss": 0.7686, "step": 11755 }, { "epoch": 0.63, "learning_rate": 4.4770140091993975e-05, "loss": 0.7635, "step": 11760 }, { "epoch": 0.63, "learning_rate": 4.4765853020938616e-05, "loss": 0.8724, "step": 11765 }, { "epoch": 0.63, "learning_rate": 4.476156439892092e-05, "loss": 0.7394, "step": 11770 }, { "epoch": 0.63, "learning_rate": 4.475727422627736e-05, "loss": 0.878, "step": 11775 }, { "epoch": 0.63, "learning_rate": 4.475298250334459e-05, "loss": 0.7403, "step": 11780 }, { "epoch": 0.63, "learning_rate": 4.474868923045937e-05, "loss": 0.6866, "step": 11785 }, { "epoch": 0.63, "learning_rate": 4.474439440795857e-05, "loss": 0.6778, "step": 11790 }, { "epoch": 0.63, "learning_rate": 4.47400980361792e-05, "loss": 0.8495, "step": 11795 }, { "epoch": 0.63, "learning_rate": 4.4735800115458376e-05, "loss": 0.8818, "step": 11800 }, { "epoch": 0.63, "learning_rate": 4.4731500646133344e-05, "loss": 0.7179, "step": 11805 }, { "epoch": 0.63, "learning_rate": 4.4727199628541474e-05, "loss": 0.7991, "step": 11810 }, { "epoch": 0.63, "learning_rate": 4.4722897063020244e-05, "loss": 0.7647, "step": 11815 }, { "epoch": 0.63, "learning_rate": 4.471859294990727e-05, "loss": 0.7148, "step": 11820 }, { "epoch": 0.63, "learning_rate": 4.471428728954027e-05, "loss": 0.7418, "step": 11825 }, { "epoch": 0.63, "learning_rate": 4.470998008225711e-05, "loss": 0.7474, "step": 11830 }, { "epoch": 0.63, "learning_rate": 4.470567132839575e-05, "loss": 0.7828, "step": 11835 }, { "epoch": 0.63, "learning_rate": 4.47013610282943e-05, "loss": 0.8215, "step": 11840 }, { "epoch": 0.63, "learning_rate": 4.469704918229096e-05, "loss": 0.8326, "step": 11845 }, { "epoch": 0.63, "learning_rate": 4.469273579072407e-05, "loss": 0.8397, "step": 11850 }, { "epoch": 0.63, "learning_rate": 4.46884208539321e-05, "loss": 0.6773, "step": 11855 }, { "epoch": 0.63, "learning_rate": 4.468410437225361e-05, "loss": 0.822, "step": 11860 }, { "epoch": 0.63, "learning_rate": 4.4679786346027305e-05, "loss": 0.7635, "step": 11865 }, { "epoch": 0.64, "learning_rate": 4.4675466775592016e-05, "loss": 0.7476, "step": 11870 }, { "epoch": 0.64, "learning_rate": 4.467114566128668e-05, "loss": 0.7371, "step": 11875 }, { "epoch": 0.64, "learning_rate": 4.4666823003450365e-05, "loss": 0.8943, "step": 11880 }, { "epoch": 0.64, "learning_rate": 4.4662498802422245e-05, "loss": 0.7084, "step": 11885 }, { "epoch": 0.64, "learning_rate": 4.4658173058541644e-05, "loss": 0.9649, "step": 11890 }, { "epoch": 0.64, "learning_rate": 4.4653845772147976e-05, "loss": 0.758, "step": 11895 }, { "epoch": 0.64, "learning_rate": 4.464951694358078e-05, "loss": 0.737, "step": 11900 }, { "epoch": 0.64, "learning_rate": 4.464518657317974e-05, "loss": 0.9439, "step": 11905 }, { "epoch": 0.64, "learning_rate": 4.464085466128465e-05, "loss": 0.8029, "step": 11910 }, { "epoch": 0.64, "learning_rate": 4.463652120823541e-05, "loss": 0.8127, "step": 11915 }, { "epoch": 0.64, "learning_rate": 4.4632186214372056e-05, "loss": 0.7715, "step": 11920 }, { "epoch": 0.64, "learning_rate": 4.462784968003474e-05, "loss": 0.8689, "step": 11925 }, { "epoch": 0.64, "learning_rate": 4.4623511605563736e-05, "loss": 0.7674, "step": 11930 }, { "epoch": 0.64, "learning_rate": 4.461917199129944e-05, "loss": 0.8544, "step": 11935 }, { "epoch": 0.64, "learning_rate": 4.4614830837582364e-05, "loss": 0.8383, "step": 11940 }, { "epoch": 0.64, "learning_rate": 4.4610488144753157e-05, "loss": 0.8173, "step": 11945 }, { "epoch": 0.64, "learning_rate": 4.460614391315255e-05, "loss": 0.6548, "step": 11950 }, { "epoch": 0.64, "learning_rate": 4.460179814312145e-05, "loss": 0.7145, "step": 11955 }, { "epoch": 0.64, "learning_rate": 4.4597450835000835e-05, "loss": 0.6893, "step": 11960 }, { "epoch": 0.64, "learning_rate": 4.459310198913183e-05, "loss": 0.6765, "step": 11965 }, { "epoch": 0.64, "learning_rate": 4.4588751605855686e-05, "loss": 0.8187, "step": 11970 }, { "epoch": 0.64, "learning_rate": 4.458439968551374e-05, "loss": 0.7609, "step": 11975 }, { "epoch": 0.64, "learning_rate": 4.45800462284475e-05, "loss": 0.8555, "step": 11980 }, { "epoch": 0.64, "learning_rate": 4.457569123499854e-05, "loss": 0.8014, "step": 11985 }, { "epoch": 0.64, "learning_rate": 4.4571334705508607e-05, "loss": 0.7674, "step": 11990 }, { "epoch": 0.64, "learning_rate": 4.4566976640319527e-05, "loss": 0.6711, "step": 11995 }, { "epoch": 0.64, "learning_rate": 4.456261703977327e-05, "loss": 0.6891, "step": 12000 }, { "epoch": 0.64, "learning_rate": 4.455825590421192e-05, "loss": 0.7042, "step": 12005 }, { "epoch": 0.64, "learning_rate": 4.455389323397768e-05, "loss": 0.8806, "step": 12010 }, { "epoch": 0.64, "learning_rate": 4.4549529029412884e-05, "loss": 0.8583, "step": 12015 }, { "epoch": 0.64, "learning_rate": 4.454516329085996e-05, "loss": 0.8913, "step": 12020 }, { "epoch": 0.64, "learning_rate": 4.454079601866148e-05, "loss": 0.6946, "step": 12025 }, { "epoch": 0.64, "learning_rate": 4.4536427213160134e-05, "loss": 0.8268, "step": 12030 }, { "epoch": 0.64, "learning_rate": 4.453205687469872e-05, "loss": 0.8099, "step": 12035 }, { "epoch": 0.64, "learning_rate": 4.452768500362017e-05, "loss": 0.7899, "step": 12040 }, { "epoch": 0.64, "learning_rate": 4.4523311600267535e-05, "loss": 0.805, "step": 12045 }, { "epoch": 0.64, "learning_rate": 4.451893666498397e-05, "loss": 0.6887, "step": 12050 }, { "epoch": 0.64, "learning_rate": 4.451456019811277e-05, "loss": 0.6805, "step": 12055 }, { "epoch": 0.65, "learning_rate": 4.4510182199997344e-05, "loss": 0.7977, "step": 12060 }, { "epoch": 0.65, "learning_rate": 4.450580267098121e-05, "loss": 0.8631, "step": 12065 }, { "epoch": 0.65, "learning_rate": 4.4501421611408024e-05, "loss": 0.707, "step": 12070 }, { "epoch": 0.65, "learning_rate": 4.449703902162156e-05, "loss": 0.6694, "step": 12075 }, { "epoch": 0.65, "learning_rate": 4.449265490196568e-05, "loss": 0.7346, "step": 12080 }, { "epoch": 0.65, "learning_rate": 4.448826925278442e-05, "loss": 0.7558, "step": 12085 }, { "epoch": 0.65, "learning_rate": 4.448388207442189e-05, "loss": 0.6895, "step": 12090 }, { "epoch": 0.65, "learning_rate": 4.4479493367222346e-05, "loss": 0.8271, "step": 12095 }, { "epoch": 0.65, "learning_rate": 4.447510313153015e-05, "loss": 0.6803, "step": 12100 }, { "epoch": 0.65, "learning_rate": 4.44707113676898e-05, "loss": 0.7462, "step": 12105 }, { "epoch": 0.65, "learning_rate": 4.446631807604589e-05, "loss": 0.6744, "step": 12110 }, { "epoch": 0.65, "learning_rate": 4.4461923256943147e-05, "loss": 0.6925, "step": 12115 }, { "epoch": 0.65, "learning_rate": 4.4457526910726434e-05, "loss": 0.7863, "step": 12120 }, { "epoch": 0.65, "learning_rate": 4.445312903774071e-05, "loss": 0.8851, "step": 12125 }, { "epoch": 0.65, "learning_rate": 4.4448729638331056e-05, "loss": 0.8196, "step": 12130 }, { "epoch": 0.65, "learning_rate": 4.444432871284269e-05, "loss": 0.7028, "step": 12135 }, { "epoch": 0.65, "learning_rate": 4.443992626162092e-05, "loss": 0.7868, "step": 12140 }, { "epoch": 0.65, "learning_rate": 4.443552228501121e-05, "loss": 0.8979, "step": 12145 }, { "epoch": 0.65, "learning_rate": 4.443111678335911e-05, "loss": 0.7723, "step": 12150 }, { "epoch": 0.65, "learning_rate": 4.4426709757010324e-05, "loss": 0.8113, "step": 12155 }, { "epoch": 0.65, "learning_rate": 4.442230120631065e-05, "loss": 0.6194, "step": 12160 }, { "epoch": 0.65, "learning_rate": 4.4417891131606005e-05, "loss": 0.7462, "step": 12165 }, { "epoch": 0.65, "learning_rate": 4.441347953324244e-05, "loss": 0.8843, "step": 12170 }, { "epoch": 0.65, "learning_rate": 4.4409066411566114e-05, "loss": 0.7431, "step": 12175 }, { "epoch": 0.65, "learning_rate": 4.440465176692332e-05, "loss": 0.7888, "step": 12180 }, { "epoch": 0.65, "learning_rate": 4.440023559966045e-05, "loss": 0.8514, "step": 12185 }, { "epoch": 0.65, "learning_rate": 4.439581791012403e-05, "loss": 0.7558, "step": 12190 }, { "epoch": 0.65, "learning_rate": 4.439139869866071e-05, "loss": 0.7756, "step": 12195 }, { "epoch": 0.65, "learning_rate": 4.438697796561724e-05, "loss": 0.7798, "step": 12200 }, { "epoch": 0.65, "learning_rate": 4.43825557113405e-05, "loss": 0.6846, "step": 12205 }, { "epoch": 0.65, "learning_rate": 4.43781319361775e-05, "loss": 0.6656, "step": 12210 }, { "epoch": 0.65, "learning_rate": 4.437370664047535e-05, "loss": 0.6589, "step": 12215 }, { "epoch": 0.65, "learning_rate": 4.436927982458129e-05, "loss": 0.8864, "step": 12220 }, { "epoch": 0.65, "learning_rate": 4.436485148884268e-05, "loss": 0.9256, "step": 12225 }, { "epoch": 0.65, "learning_rate": 4.4360421633607005e-05, "loss": 0.8187, "step": 12230 }, { "epoch": 0.65, "learning_rate": 4.435599025922185e-05, "loss": 0.6863, "step": 12235 }, { "epoch": 0.65, "learning_rate": 4.4351557366034934e-05, "loss": 0.6832, "step": 12240 }, { "epoch": 0.66, "learning_rate": 4.43471229543941e-05, "loss": 0.6344, "step": 12245 }, { "epoch": 0.66, "learning_rate": 4.434268702464728e-05, "loss": 0.8072, "step": 12250 }, { "epoch": 0.66, "learning_rate": 4.4338249577142564e-05, "loss": 0.6251, "step": 12255 }, { "epoch": 0.66, "learning_rate": 4.433381061222814e-05, "loss": 0.8505, "step": 12260 }, { "epoch": 0.66, "learning_rate": 4.432937013025232e-05, "loss": 0.8873, "step": 12265 }, { "epoch": 0.66, "learning_rate": 4.4324928131563546e-05, "loss": 0.818, "step": 12270 }, { "epoch": 0.66, "learning_rate": 4.432048461651034e-05, "loss": 0.6063, "step": 12275 }, { "epoch": 0.66, "learning_rate": 4.43160395854414e-05, "loss": 0.7536, "step": 12280 }, { "epoch": 0.66, "learning_rate": 4.431159303870549e-05, "loss": 0.8557, "step": 12285 }, { "epoch": 0.66, "learning_rate": 4.430714497665153e-05, "loss": 0.7247, "step": 12290 }, { "epoch": 0.66, "learning_rate": 4.430269539962854e-05, "loss": 0.8816, "step": 12295 }, { "epoch": 0.66, "learning_rate": 4.429824430798566e-05, "loss": 0.8217, "step": 12300 }, { "epoch": 0.66, "learning_rate": 4.429379170207215e-05, "loss": 0.6246, "step": 12305 }, { "epoch": 0.66, "learning_rate": 4.428933758223741e-05, "loss": 0.9075, "step": 12310 }, { "epoch": 0.66, "learning_rate": 4.428488194883093e-05, "loss": 0.7784, "step": 12315 }, { "epoch": 0.66, "learning_rate": 4.4280424802202315e-05, "loss": 0.7788, "step": 12320 }, { "epoch": 0.66, "learning_rate": 4.427596614270133e-05, "loss": 0.8232, "step": 12325 }, { "epoch": 0.66, "learning_rate": 4.427150597067781e-05, "loss": 0.8237, "step": 12330 }, { "epoch": 0.66, "learning_rate": 4.426704428648174e-05, "loss": 0.8094, "step": 12335 }, { "epoch": 0.66, "learning_rate": 4.426258109046321e-05, "loss": 0.7078, "step": 12340 }, { "epoch": 0.66, "learning_rate": 4.425811638297243e-05, "loss": 0.8493, "step": 12345 }, { "epoch": 0.66, "learning_rate": 4.4253650164359726e-05, "loss": 0.813, "step": 12350 }, { "epoch": 0.66, "learning_rate": 4.424918243497557e-05, "loss": 0.8007, "step": 12355 }, { "epoch": 0.66, "learning_rate": 4.4244713195170515e-05, "loss": 0.8579, "step": 12360 }, { "epoch": 0.66, "learning_rate": 4.424024244529524e-05, "loss": 0.9052, "step": 12365 }, { "epoch": 0.66, "learning_rate": 4.4235770185700575e-05, "loss": 0.7603, "step": 12370 }, { "epoch": 0.66, "learning_rate": 4.4231296416737425e-05, "loss": 0.7722, "step": 12375 }, { "epoch": 0.66, "learning_rate": 4.422682113875683e-05, "loss": 0.8097, "step": 12380 }, { "epoch": 0.66, "learning_rate": 4.4222344352109955e-05, "loss": 0.7279, "step": 12385 }, { "epoch": 0.66, "learning_rate": 4.421786605714808e-05, "loss": 0.7405, "step": 12390 }, { "epoch": 0.66, "learning_rate": 4.421338625422261e-05, "loss": 0.6884, "step": 12395 }, { "epoch": 0.66, "learning_rate": 4.4208904943685045e-05, "loss": 0.854, "step": 12400 }, { "epoch": 0.66, "learning_rate": 4.420442212588703e-05, "loss": 0.74, "step": 12405 }, { "epoch": 0.66, "learning_rate": 4.4199937801180314e-05, "loss": 0.672, "step": 12410 }, { "epoch": 0.66, "learning_rate": 4.419545196991677e-05, "loss": 0.8228, "step": 12415 }, { "epoch": 0.66, "learning_rate": 4.4190964632448384e-05, "loss": 0.8464, "step": 12420 }, { "epoch": 0.66, "learning_rate": 4.418647578912726e-05, "loss": 0.8099, "step": 12425 }, { "epoch": 0.66, "learning_rate": 4.4181985440305626e-05, "loss": 0.7404, "step": 12430 }, { "epoch": 0.67, "learning_rate": 4.417749358633582e-05, "loss": 0.6858, "step": 12435 }, { "epoch": 0.67, "learning_rate": 4.4173000227570315e-05, "loss": 0.7632, "step": 12440 }, { "epoch": 0.67, "learning_rate": 4.416850536436168e-05, "loss": 0.8343, "step": 12445 }, { "epoch": 0.67, "learning_rate": 4.416400899706261e-05, "loss": 0.8169, "step": 12450 }, { "epoch": 0.67, "learning_rate": 4.415951112602593e-05, "loss": 0.7808, "step": 12455 }, { "epoch": 0.67, "learning_rate": 4.415501175160458e-05, "loss": 0.759, "step": 12460 }, { "epoch": 0.67, "learning_rate": 4.415051087415159e-05, "loss": 0.8992, "step": 12465 }, { "epoch": 0.67, "learning_rate": 4.4146008494020144e-05, "loss": 0.9015, "step": 12470 }, { "epoch": 0.67, "learning_rate": 4.414150461156352e-05, "loss": 0.7866, "step": 12475 }, { "epoch": 0.67, "learning_rate": 4.4136999227135136e-05, "loss": 0.7988, "step": 12480 }, { "epoch": 0.67, "learning_rate": 4.41324923410885e-05, "loss": 0.8147, "step": 12485 }, { "epoch": 0.67, "learning_rate": 4.412798395377726e-05, "loss": 0.8618, "step": 12490 }, { "epoch": 0.67, "learning_rate": 4.412347406555518e-05, "loss": 0.7601, "step": 12495 }, { "epoch": 0.67, "learning_rate": 4.411896267677612e-05, "loss": 0.7719, "step": 12500 }, { "epoch": 0.67, "learning_rate": 4.41144497877941e-05, "loss": 0.7976, "step": 12505 }, { "epoch": 0.67, "learning_rate": 4.410993539896321e-05, "loss": 0.7467, "step": 12510 }, { "epoch": 0.67, "learning_rate": 4.410541951063768e-05, "loss": 0.6516, "step": 12515 }, { "epoch": 0.67, "learning_rate": 4.410090212317187e-05, "loss": 0.8148, "step": 12520 }, { "epoch": 0.67, "learning_rate": 4.409638323692024e-05, "loss": 0.9414, "step": 12525 }, { "epoch": 0.67, "learning_rate": 4.4091862852237355e-05, "loss": 0.9168, "step": 12530 }, { "epoch": 0.67, "learning_rate": 4.408734096947794e-05, "loss": 0.7589, "step": 12535 }, { "epoch": 0.67, "learning_rate": 4.4082817588996805e-05, "loss": 0.85, "step": 12540 }, { "epoch": 0.67, "learning_rate": 4.407829271114888e-05, "loss": 0.7882, "step": 12545 }, { "epoch": 0.67, "learning_rate": 4.4073766336289216e-05, "loss": 0.9139, "step": 12550 }, { "epoch": 0.67, "learning_rate": 4.4069238464772994e-05, "loss": 0.7504, "step": 12555 }, { "epoch": 0.67, "learning_rate": 4.4065615090206036e-05, "loss": 0.8008, "step": 12560 }, { "epoch": 0.67, "learning_rate": 4.406108452560339e-05, "loss": 0.7145, "step": 12565 }, { "epoch": 0.67, "learning_rate": 4.405655246533929e-05, "loss": 0.8822, "step": 12570 }, { "epoch": 0.67, "learning_rate": 4.405201890976934e-05, "loss": 0.8377, "step": 12575 }, { "epoch": 0.67, "learning_rate": 4.404748385924927e-05, "loss": 0.8367, "step": 12580 }, { "epoch": 0.67, "learning_rate": 4.4042947314134935e-05, "loss": 0.7154, "step": 12585 }, { "epoch": 0.67, "learning_rate": 4.4038409274782306e-05, "loss": 0.9397, "step": 12590 }, { "epoch": 0.67, "learning_rate": 4.403386974154747e-05, "loss": 0.9481, "step": 12595 }, { "epoch": 0.67, "learning_rate": 4.4029328714786613e-05, "loss": 1.0109, "step": 12600 }, { "epoch": 0.67, "learning_rate": 4.402478619485608e-05, "loss": 0.7807, "step": 12605 }, { "epoch": 0.67, "learning_rate": 4.402024218211229e-05, "loss": 0.9127, "step": 12610 }, { "epoch": 0.67, "learning_rate": 4.40156966769118e-05, "loss": 0.8309, "step": 12615 }, { "epoch": 0.68, "learning_rate": 4.4011149679611285e-05, "loss": 0.7362, "step": 12620 }, { "epoch": 0.68, "learning_rate": 4.400660119056753e-05, "loss": 0.8508, "step": 12625 }, { "epoch": 0.68, "learning_rate": 4.400205121013744e-05, "loss": 0.8496, "step": 12630 }, { "epoch": 0.68, "learning_rate": 4.399749973867804e-05, "loss": 0.8917, "step": 12635 }, { "epoch": 0.68, "learning_rate": 4.399294677654646e-05, "loss": 0.7266, "step": 12640 }, { "epoch": 0.68, "learning_rate": 4.398839232409997e-05, "loss": 0.8649, "step": 12645 }, { "epoch": 0.68, "learning_rate": 4.398383638169594e-05, "loss": 0.9445, "step": 12650 }, { "epoch": 0.68, "learning_rate": 4.397927894969185e-05, "loss": 0.8074, "step": 12655 }, { "epoch": 0.68, "learning_rate": 4.397472002844532e-05, "loss": 0.6829, "step": 12660 }, { "epoch": 0.68, "learning_rate": 4.3970159618314064e-05, "loss": 0.8344, "step": 12665 }, { "epoch": 0.68, "learning_rate": 4.396559771965592e-05, "loss": 0.8884, "step": 12670 }, { "epoch": 0.68, "learning_rate": 4.396103433282885e-05, "loss": 0.8908, "step": 12675 }, { "epoch": 0.68, "learning_rate": 4.395646945819094e-05, "loss": 0.7523, "step": 12680 }, { "epoch": 0.68, "learning_rate": 4.395190309610037e-05, "loss": 0.7315, "step": 12685 }, { "epoch": 0.68, "learning_rate": 4.3947335246915445e-05, "loss": 0.6234, "step": 12690 }, { "epoch": 0.68, "learning_rate": 4.3942765910994594e-05, "loss": 0.7772, "step": 12695 }, { "epoch": 0.68, "learning_rate": 4.3938195088696356e-05, "loss": 0.8661, "step": 12700 }, { "epoch": 0.68, "learning_rate": 4.393362278037938e-05, "loss": 0.7324, "step": 12705 }, { "epoch": 0.68, "learning_rate": 4.392904898640246e-05, "loss": 0.8745, "step": 12710 }, { "epoch": 0.68, "learning_rate": 4.392447370712447e-05, "loss": 0.7445, "step": 12715 }, { "epoch": 0.68, "learning_rate": 4.391989694290443e-05, "loss": 0.8657, "step": 12720 }, { "epoch": 0.68, "learning_rate": 4.3915318694101445e-05, "loss": 0.8338, "step": 12725 }, { "epoch": 0.68, "learning_rate": 4.391073896107477e-05, "loss": 0.7449, "step": 12730 }, { "epoch": 0.68, "learning_rate": 4.3906157744183766e-05, "loss": 0.7508, "step": 12735 }, { "epoch": 0.68, "learning_rate": 4.39015750437879e-05, "loss": 0.786, "step": 12740 }, { "epoch": 0.68, "learning_rate": 4.389699086024676e-05, "loss": 0.6942, "step": 12745 }, { "epoch": 0.68, "learning_rate": 4.389240519392005e-05, "loss": 1.0015, "step": 12750 }, { "epoch": 0.68, "learning_rate": 4.38878180451676e-05, "loss": 0.7224, "step": 12755 }, { "epoch": 0.68, "learning_rate": 4.3883229414349334e-05, "loss": 0.7491, "step": 12760 }, { "epoch": 0.68, "learning_rate": 4.387863930182532e-05, "loss": 0.7287, "step": 12765 }, { "epoch": 0.68, "learning_rate": 4.387404770795573e-05, "loss": 0.7377, "step": 12770 }, { "epoch": 0.68, "learning_rate": 4.386945463310085e-05, "loss": 0.8481, "step": 12775 }, { "epoch": 0.68, "learning_rate": 4.3864860077621074e-05, "loss": 0.7454, "step": 12780 }, { "epoch": 0.68, "learning_rate": 4.386026404187693e-05, "loss": 0.7473, "step": 12785 }, { "epoch": 0.68, "learning_rate": 4.385566652622906e-05, "loss": 0.6627, "step": 12790 }, { "epoch": 0.68, "learning_rate": 4.3851067531038206e-05, "loss": 0.8933, "step": 12795 }, { "epoch": 0.68, "learning_rate": 4.384646705666524e-05, "loss": 0.8745, "step": 12800 }, { "epoch": 0.69, "learning_rate": 4.384186510347114e-05, "loss": 0.6821, "step": 12805 }, { "epoch": 0.69, "learning_rate": 4.383726167181702e-05, "loss": 0.7889, "step": 12810 }, { "epoch": 0.69, "learning_rate": 4.383265676206408e-05, "loss": 0.8617, "step": 12815 }, { "epoch": 0.69, "learning_rate": 4.382805037457367e-05, "loss": 0.8249, "step": 12820 }, { "epoch": 0.69, "learning_rate": 4.3823442509707225e-05, "loss": 0.7794, "step": 12825 }, { "epoch": 0.69, "learning_rate": 4.3818833167826316e-05, "loss": 0.9807, "step": 12830 }, { "epoch": 0.69, "learning_rate": 4.381422234929262e-05, "loss": 0.7749, "step": 12835 }, { "epoch": 0.69, "learning_rate": 4.3809610054467934e-05, "loss": 0.8389, "step": 12840 }, { "epoch": 0.69, "learning_rate": 4.380499628371417e-05, "loss": 0.7548, "step": 12845 }, { "epoch": 0.69, "learning_rate": 4.380038103739335e-05, "loss": 0.7417, "step": 12850 }, { "epoch": 0.69, "learning_rate": 4.3795764315867625e-05, "loss": 0.7841, "step": 12855 }, { "epoch": 0.69, "learning_rate": 4.3791146119499246e-05, "loss": 0.8969, "step": 12860 }, { "epoch": 0.69, "learning_rate": 4.3786526448650614e-05, "loss": 0.7581, "step": 12865 }, { "epoch": 0.69, "learning_rate": 4.378190530368418e-05, "loss": 0.8292, "step": 12870 }, { "epoch": 0.69, "learning_rate": 4.377728268496257e-05, "loss": 0.7353, "step": 12875 }, { "epoch": 0.69, "learning_rate": 4.377265859284851e-05, "loss": 0.7211, "step": 12880 }, { "epoch": 0.69, "learning_rate": 4.376803302770483e-05, "loss": 0.7554, "step": 12885 }, { "epoch": 0.69, "learning_rate": 4.376340598989448e-05, "loss": 0.815, "step": 12890 }, { "epoch": 0.69, "learning_rate": 4.3758777479780545e-05, "loss": 0.8335, "step": 12895 }, { "epoch": 0.69, "learning_rate": 4.375414749772619e-05, "loss": 0.7592, "step": 12900 }, { "epoch": 0.69, "learning_rate": 4.374951604409473e-05, "loss": 0.8748, "step": 12905 }, { "epoch": 0.69, "learning_rate": 4.374488311924958e-05, "loss": 0.7128, "step": 12910 }, { "epoch": 0.69, "learning_rate": 4.3740248723554256e-05, "loss": 0.7926, "step": 12915 }, { "epoch": 0.69, "learning_rate": 4.37356128573724e-05, "loss": 0.7336, "step": 12920 }, { "epoch": 0.69, "learning_rate": 4.3730975521067805e-05, "loss": 0.679, "step": 12925 }, { "epoch": 0.69, "learning_rate": 4.372633671500431e-05, "loss": 0.7649, "step": 12930 }, { "epoch": 0.69, "learning_rate": 4.3721696439545936e-05, "loss": 0.8468, "step": 12935 }, { "epoch": 0.69, "learning_rate": 4.371705469505678e-05, "loss": 0.6405, "step": 12940 }, { "epoch": 0.69, "learning_rate": 4.371241148190107e-05, "loss": 0.703, "step": 12945 }, { "epoch": 0.69, "learning_rate": 4.370776680044313e-05, "loss": 0.7934, "step": 12950 }, { "epoch": 0.69, "learning_rate": 4.370312065104741e-05, "loss": 0.9499, "step": 12955 }, { "epoch": 0.69, "learning_rate": 4.369847303407851e-05, "loss": 0.8789, "step": 12960 }, { "epoch": 0.69, "learning_rate": 4.369382394990108e-05, "loss": 0.8279, "step": 12965 }, { "epoch": 0.69, "learning_rate": 4.368917339887993e-05, "loss": 0.8488, "step": 12970 }, { "epoch": 0.69, "learning_rate": 4.368452138137998e-05, "loss": 0.8183, "step": 12975 }, { "epoch": 0.69, "learning_rate": 4.3679867897766255e-05, "loss": 0.888, "step": 12980 }, { "epoch": 0.69, "learning_rate": 4.3675212948403896e-05, "loss": 0.7248, "step": 12985 }, { "epoch": 0.69, "learning_rate": 4.367055653365816e-05, "loss": 0.9176, "step": 12990 }, { "epoch": 0.7, "learning_rate": 4.366589865389443e-05, "loss": 0.8361, "step": 12995 }, { "epoch": 0.7, "learning_rate": 4.3661239309478186e-05, "loss": 0.7936, "step": 13000 }, { "epoch": 0.7, "learning_rate": 4.365657850077504e-05, "loss": 0.7791, "step": 13005 }, { "epoch": 0.7, "learning_rate": 4.3651916228150694e-05, "loss": 0.7412, "step": 13010 }, { "epoch": 0.7, "learning_rate": 4.3647252491971e-05, "loss": 0.8163, "step": 13015 }, { "epoch": 0.7, "learning_rate": 4.3642587292601886e-05, "loss": 0.778, "step": 13020 }, { "epoch": 0.7, "learning_rate": 4.363792063040945e-05, "loss": 0.8507, "step": 13025 }, { "epoch": 0.7, "learning_rate": 4.3633252505759825e-05, "loss": 0.7303, "step": 13030 }, { "epoch": 0.7, "learning_rate": 4.362858291901933e-05, "loss": 0.8076, "step": 13035 }, { "epoch": 0.7, "learning_rate": 4.362391187055438e-05, "loss": 0.8979, "step": 13040 }, { "epoch": 0.7, "learning_rate": 4.361923936073148e-05, "loss": 0.7571, "step": 13045 }, { "epoch": 0.7, "learning_rate": 4.361456538991727e-05, "loss": 0.6578, "step": 13050 }, { "epoch": 0.7, "learning_rate": 4.36098899584785e-05, "loss": 0.6153, "step": 13055 }, { "epoch": 0.7, "learning_rate": 4.3605213066782045e-05, "loss": 0.7902, "step": 13060 }, { "epoch": 0.7, "learning_rate": 4.360053471519489e-05, "loss": 0.7363, "step": 13065 }, { "epoch": 0.7, "learning_rate": 4.35958549040841e-05, "loss": 0.8214, "step": 13070 }, { "epoch": 0.7, "learning_rate": 4.359117363381691e-05, "loss": 0.7418, "step": 13075 }, { "epoch": 0.7, "learning_rate": 4.358649090476065e-05, "loss": 0.7491, "step": 13080 }, { "epoch": 0.7, "learning_rate": 4.358180671728274e-05, "loss": 0.7573, "step": 13085 }, { "epoch": 0.7, "learning_rate": 4.357712107175073e-05, "loss": 0.6123, "step": 13090 }, { "epoch": 0.7, "learning_rate": 4.357243396853231e-05, "loss": 0.6387, "step": 13095 }, { "epoch": 0.7, "learning_rate": 4.356774540799524e-05, "loss": 0.8532, "step": 13100 }, { "epoch": 0.7, "learning_rate": 4.356305539050744e-05, "loss": 0.7031, "step": 13105 }, { "epoch": 0.7, "learning_rate": 4.3558363916436894e-05, "loss": 0.6158, "step": 13110 }, { "epoch": 0.7, "learning_rate": 4.355367098615174e-05, "loss": 0.7412, "step": 13115 }, { "epoch": 0.7, "learning_rate": 4.354897660002022e-05, "loss": 0.7006, "step": 13120 }, { "epoch": 0.7, "learning_rate": 4.3544280758410676e-05, "loss": 0.7137, "step": 13125 }, { "epoch": 0.7, "learning_rate": 4.353958346169159e-05, "loss": 0.854, "step": 13130 }, { "epoch": 0.7, "learning_rate": 4.353488471023153e-05, "loss": 0.8121, "step": 13135 }, { "epoch": 0.7, "learning_rate": 4.3530184504399204e-05, "loss": 0.5475, "step": 13140 }, { "epoch": 0.7, "learning_rate": 4.352548284456341e-05, "loss": 0.7884, "step": 13145 }, { "epoch": 0.7, "learning_rate": 4.3520779731093084e-05, "loss": 0.6686, "step": 13150 }, { "epoch": 0.7, "learning_rate": 4.351607516435725e-05, "loss": 0.8095, "step": 13155 }, { "epoch": 0.7, "learning_rate": 4.3511369144725066e-05, "loss": 0.6401, "step": 13160 }, { "epoch": 0.7, "learning_rate": 4.350666167256581e-05, "loss": 0.8313, "step": 13165 }, { "epoch": 0.7, "learning_rate": 4.350195274824885e-05, "loss": 0.7963, "step": 13170 }, { "epoch": 0.7, "learning_rate": 4.349724237214368e-05, "loss": 0.6575, "step": 13175 }, { "epoch": 0.71, "learning_rate": 4.349253054461991e-05, "loss": 0.8806, "step": 13180 }, { "epoch": 0.71, "learning_rate": 4.348781726604726e-05, "loss": 0.7534, "step": 13185 }, { "epoch": 0.71, "learning_rate": 4.3483102536795566e-05, "loss": 0.8731, "step": 13190 }, { "epoch": 0.71, "learning_rate": 4.3478386357234786e-05, "loss": 0.7056, "step": 13195 }, { "epoch": 0.71, "learning_rate": 4.3473668727734966e-05, "loss": 0.9734, "step": 13200 }, { "epoch": 0.71, "learning_rate": 4.34689496486663e-05, "loss": 0.833, "step": 13205 }, { "epoch": 0.71, "learning_rate": 4.3464229120399075e-05, "loss": 0.7495, "step": 13210 }, { "epoch": 0.71, "learning_rate": 4.345950714330369e-05, "loss": 0.7892, "step": 13215 }, { "epoch": 0.71, "learning_rate": 4.3454783717750664e-05, "loss": 0.7466, "step": 13220 }, { "epoch": 0.71, "learning_rate": 4.345005884411063e-05, "loss": 0.7017, "step": 13225 }, { "epoch": 0.71, "learning_rate": 4.3445332522754335e-05, "loss": 0.907, "step": 13230 }, { "epoch": 0.71, "learning_rate": 4.344060475405264e-05, "loss": 0.6775, "step": 13235 }, { "epoch": 0.71, "learning_rate": 4.343587553837651e-05, "loss": 0.7486, "step": 13240 }, { "epoch": 0.71, "learning_rate": 4.343114487609704e-05, "loss": 0.7794, "step": 13245 }, { "epoch": 0.71, "learning_rate": 4.342641276758543e-05, "loss": 0.7655, "step": 13250 }, { "epoch": 0.71, "learning_rate": 4.3421679213212986e-05, "loss": 0.8388, "step": 13255 }, { "epoch": 0.71, "learning_rate": 4.341694421335114e-05, "loss": 0.7333, "step": 13260 }, { "epoch": 0.71, "learning_rate": 4.341220776837143e-05, "loss": 0.9167, "step": 13265 }, { "epoch": 0.71, "learning_rate": 4.34074698786455e-05, "loss": 0.8251, "step": 13270 }, { "epoch": 0.71, "learning_rate": 4.3402730544545135e-05, "loss": 0.7719, "step": 13275 }, { "epoch": 0.71, "learning_rate": 4.3397989766442204e-05, "loss": 0.9258, "step": 13280 }, { "epoch": 0.71, "learning_rate": 4.3393247544708706e-05, "loss": 0.9782, "step": 13285 }, { "epoch": 0.71, "learning_rate": 4.338850387971674e-05, "loss": 0.7782, "step": 13290 }, { "epoch": 0.71, "learning_rate": 4.3383758771838534e-05, "loss": 0.8658, "step": 13295 }, { "epoch": 0.71, "learning_rate": 4.3379012221446415e-05, "loss": 0.6974, "step": 13300 }, { "epoch": 0.71, "learning_rate": 4.337426422891283e-05, "loss": 0.7487, "step": 13305 }, { "epoch": 0.71, "learning_rate": 4.336951479461035e-05, "loss": 0.7499, "step": 13310 }, { "epoch": 0.71, "learning_rate": 4.3364763918911635e-05, "loss": 0.8276, "step": 13315 }, { "epoch": 0.71, "learning_rate": 4.336001160218947e-05, "loss": 0.8311, "step": 13320 }, { "epoch": 0.71, "learning_rate": 4.335525784481676e-05, "loss": 0.7503, "step": 13325 }, { "epoch": 0.71, "learning_rate": 4.335050264716652e-05, "loss": 0.74, "step": 13330 }, { "epoch": 0.71, "learning_rate": 4.3345746009611865e-05, "loss": 0.6921, "step": 13335 }, { "epoch": 0.71, "learning_rate": 4.334098793252604e-05, "loss": 0.8065, "step": 13340 }, { "epoch": 0.71, "learning_rate": 4.333622841628239e-05, "loss": 0.8301, "step": 13345 }, { "epoch": 0.71, "learning_rate": 4.3331467461254387e-05, "loss": 0.7791, "step": 13350 }, { "epoch": 0.71, "learning_rate": 4.3326705067815606e-05, "loss": 0.6827, "step": 13355 }, { "epoch": 0.71, "learning_rate": 4.3321941236339724e-05, "loss": 0.658, "step": 13360 }, { "epoch": 0.71, "learning_rate": 4.331717596720056e-05, "loss": 0.7526, "step": 13365 }, { "epoch": 0.72, "learning_rate": 4.331240926077202e-05, "loss": 0.8567, "step": 13370 }, { "epoch": 0.72, "learning_rate": 4.3307641117428146e-05, "loss": 0.8012, "step": 13375 }, { "epoch": 0.72, "learning_rate": 4.3302871537543055e-05, "loss": 0.7934, "step": 13380 }, { "epoch": 0.72, "learning_rate": 4.329810052149101e-05, "loss": 0.5949, "step": 13385 }, { "epoch": 0.72, "learning_rate": 4.329332806964639e-05, "loss": 0.6297, "step": 13390 }, { "epoch": 0.72, "learning_rate": 4.328855418238366e-05, "loss": 0.8759, "step": 13395 }, { "epoch": 0.72, "learning_rate": 4.328377886007742e-05, "loss": 0.6256, "step": 13400 }, { "epoch": 0.72, "learning_rate": 4.3279002103102364e-05, "loss": 0.8063, "step": 13405 }, { "epoch": 0.72, "learning_rate": 4.327422391183331e-05, "loss": 0.7435, "step": 13410 }, { "epoch": 0.72, "learning_rate": 4.3269444286645195e-05, "loss": 0.8356, "step": 13415 }, { "epoch": 0.72, "learning_rate": 4.326466322791306e-05, "loss": 0.8603, "step": 13420 }, { "epoch": 0.72, "learning_rate": 4.325988073601205e-05, "loss": 0.7352, "step": 13425 }, { "epoch": 0.72, "learning_rate": 4.325509681131744e-05, "loss": 0.7229, "step": 13430 }, { "epoch": 0.72, "learning_rate": 4.3250311454204604e-05, "loss": 0.7984, "step": 13435 }, { "epoch": 0.72, "learning_rate": 4.324552466504904e-05, "loss": 0.854, "step": 13440 }, { "epoch": 0.72, "learning_rate": 4.324073644422635e-05, "loss": 0.9319, "step": 13445 }, { "epoch": 0.72, "learning_rate": 4.323594679211224e-05, "loss": 0.7814, "step": 13450 }, { "epoch": 0.72, "learning_rate": 4.323115570908255e-05, "loss": 0.7654, "step": 13455 }, { "epoch": 0.72, "learning_rate": 4.322636319551322e-05, "loss": 0.7388, "step": 13460 }, { "epoch": 0.72, "learning_rate": 4.3221569251780294e-05, "loss": 0.9102, "step": 13465 }, { "epoch": 0.72, "learning_rate": 4.321677387825995e-05, "loss": 0.7954, "step": 13470 }, { "epoch": 0.72, "learning_rate": 4.321197707532846e-05, "loss": 0.7261, "step": 13475 }, { "epoch": 0.72, "learning_rate": 4.3207178843362206e-05, "loss": 0.8319, "step": 13480 }, { "epoch": 0.72, "learning_rate": 4.3202379182737696e-05, "loss": 0.812, "step": 13485 }, { "epoch": 0.72, "learning_rate": 4.3197578093831546e-05, "loss": 0.6999, "step": 13490 }, { "epoch": 0.72, "learning_rate": 4.319277557702048e-05, "loss": 0.7062, "step": 13495 }, { "epoch": 0.72, "learning_rate": 4.3187971632681334e-05, "loss": 0.7148, "step": 13500 }, { "epoch": 0.72, "learning_rate": 4.318316626119106e-05, "loss": 0.7924, "step": 13505 }, { "epoch": 0.72, "learning_rate": 4.317835946292673e-05, "loss": 0.8971, "step": 13510 }, { "epoch": 0.72, "learning_rate": 4.31735512382655e-05, "loss": 0.8842, "step": 13515 }, { "epoch": 0.72, "learning_rate": 4.316874158758465e-05, "loss": 0.7511, "step": 13520 }, { "epoch": 0.72, "learning_rate": 4.316393051126161e-05, "loss": 0.826, "step": 13525 }, { "epoch": 0.72, "learning_rate": 4.315911800967386e-05, "loss": 0.7259, "step": 13530 }, { "epoch": 0.72, "learning_rate": 4.315430408319903e-05, "loss": 0.7994, "step": 13535 }, { "epoch": 0.72, "learning_rate": 4.314948873221486e-05, "loss": 0.7357, "step": 13540 }, { "epoch": 0.72, "learning_rate": 4.3144671957099195e-05, "loss": 0.8327, "step": 13545 }, { "epoch": 0.72, "learning_rate": 4.3139853758229974e-05, "loss": 0.7013, "step": 13550 }, { "epoch": 0.73, "learning_rate": 4.3135034135985284e-05, "loss": 0.7645, "step": 13555 }, { "epoch": 0.73, "learning_rate": 4.31302130907433e-05, "loss": 0.7555, "step": 13560 }, { "epoch": 0.73, "learning_rate": 4.312539062288232e-05, "loss": 0.6446, "step": 13565 }, { "epoch": 0.73, "learning_rate": 4.3120566732780734e-05, "loss": 0.9196, "step": 13570 }, { "epoch": 0.73, "learning_rate": 4.311574142081706e-05, "loss": 0.7571, "step": 13575 }, { "epoch": 0.73, "learning_rate": 4.311091468736993e-05, "loss": 0.8022, "step": 13580 }, { "epoch": 0.73, "learning_rate": 4.310608653281807e-05, "loss": 0.7824, "step": 13585 }, { "epoch": 0.73, "learning_rate": 4.310125695754036e-05, "loss": 0.7804, "step": 13590 }, { "epoch": 0.73, "learning_rate": 4.3096425961915726e-05, "loss": 0.9084, "step": 13595 }, { "epoch": 0.73, "learning_rate": 4.309159354632326e-05, "loss": 0.7308, "step": 13600 }, { "epoch": 0.73, "learning_rate": 4.308675971114215e-05, "loss": 0.8479, "step": 13605 }, { "epoch": 0.73, "learning_rate": 4.3081924456751665e-05, "loss": 0.7876, "step": 13610 }, { "epoch": 0.73, "learning_rate": 4.307708778353124e-05, "loss": 0.7154, "step": 13615 }, { "epoch": 0.73, "learning_rate": 4.307224969186038e-05, "loss": 0.7687, "step": 13620 }, { "epoch": 0.73, "learning_rate": 4.306741018211872e-05, "loss": 0.8058, "step": 13625 }, { "epoch": 0.73, "learning_rate": 4.3062569254685994e-05, "loss": 0.8055, "step": 13630 }, { "epoch": 0.73, "learning_rate": 4.3057726909942054e-05, "loss": 0.7865, "step": 13635 }, { "epoch": 0.73, "learning_rate": 4.305288314826688e-05, "loss": 0.8011, "step": 13640 }, { "epoch": 0.73, "learning_rate": 4.3048037970040515e-05, "loss": 0.6767, "step": 13645 }, { "epoch": 0.73, "learning_rate": 4.304319137564318e-05, "loss": 0.9095, "step": 13650 }, { "epoch": 0.73, "learning_rate": 4.303834336545514e-05, "loss": 0.6939, "step": 13655 }, { "epoch": 0.73, "learning_rate": 4.3033493939856816e-05, "loss": 0.7084, "step": 13660 }, { "epoch": 0.73, "learning_rate": 4.302864309922874e-05, "loss": 0.8821, "step": 13665 }, { "epoch": 0.73, "learning_rate": 4.3023790843951515e-05, "loss": 0.7774, "step": 13670 }, { "epoch": 0.73, "learning_rate": 4.30189371744059e-05, "loss": 0.8113, "step": 13675 }, { "epoch": 0.73, "learning_rate": 4.301408209097274e-05, "loss": 0.7243, "step": 13680 }, { "epoch": 0.73, "learning_rate": 4.300922559403301e-05, "loss": 0.7637, "step": 13685 }, { "epoch": 0.73, "learning_rate": 4.300436768396776e-05, "loss": 0.8162, "step": 13690 }, { "epoch": 0.73, "learning_rate": 4.299950836115819e-05, "loss": 0.7728, "step": 13695 }, { "epoch": 0.73, "learning_rate": 4.29946476259856e-05, "loss": 0.7161, "step": 13700 }, { "epoch": 0.73, "learning_rate": 4.298978547883138e-05, "loss": 0.7888, "step": 13705 }, { "epoch": 0.73, "learning_rate": 4.298492192007707e-05, "loss": 0.8428, "step": 13710 }, { "epoch": 0.73, "learning_rate": 4.298005695010427e-05, "loss": 0.7264, "step": 13715 }, { "epoch": 0.73, "learning_rate": 4.297519056929474e-05, "loss": 0.6785, "step": 13720 }, { "epoch": 0.73, "learning_rate": 4.297032277803032e-05, "loss": 0.7387, "step": 13725 }, { "epoch": 0.73, "learning_rate": 4.296545357669297e-05, "loss": 0.8192, "step": 13730 }, { "epoch": 0.73, "learning_rate": 4.296058296566476e-05, "loss": 0.682, "step": 13735 }, { "epoch": 0.74, "learning_rate": 4.2955710945327875e-05, "loss": 0.7063, "step": 13740 }, { "epoch": 0.74, "learning_rate": 4.2950837516064605e-05, "loss": 0.8201, "step": 13745 }, { "epoch": 0.74, "learning_rate": 4.2945962678257344e-05, "loss": 0.9147, "step": 13750 }, { "epoch": 0.74, "learning_rate": 4.294108643228862e-05, "loss": 0.8736, "step": 13755 }, { "epoch": 0.74, "learning_rate": 4.293620877854104e-05, "loss": 0.8974, "step": 13760 }, { "epoch": 0.74, "learning_rate": 4.293132971739736e-05, "loss": 0.7415, "step": 13765 }, { "epoch": 0.74, "learning_rate": 4.29264492492404e-05, "loss": 0.8989, "step": 13770 }, { "epoch": 0.74, "learning_rate": 4.292156737445312e-05, "loss": 0.7945, "step": 13775 }, { "epoch": 0.74, "learning_rate": 4.2916684093418605e-05, "loss": 0.6267, "step": 13780 }, { "epoch": 0.74, "learning_rate": 4.291179940652e-05, "loss": 0.8909, "step": 13785 }, { "epoch": 0.74, "learning_rate": 4.290691331414061e-05, "loss": 0.7708, "step": 13790 }, { "epoch": 0.74, "learning_rate": 4.2902025816663826e-05, "loss": 0.9017, "step": 13795 }, { "epoch": 0.74, "learning_rate": 4.289713691447316e-05, "loss": 0.7676, "step": 13800 }, { "epoch": 0.74, "learning_rate": 4.2892246607952214e-05, "loss": 0.8595, "step": 13805 }, { "epoch": 0.74, "learning_rate": 4.288735489748473e-05, "loss": 0.7769, "step": 13810 }, { "epoch": 0.74, "learning_rate": 4.2882461783454534e-05, "loss": 0.8757, "step": 13815 }, { "epoch": 0.74, "learning_rate": 4.287756726624557e-05, "loss": 0.7026, "step": 13820 }, { "epoch": 0.74, "learning_rate": 4.287267134624191e-05, "loss": 0.7234, "step": 13825 }, { "epoch": 0.74, "learning_rate": 4.2867774023827706e-05, "loss": 0.6693, "step": 13830 }, { "epoch": 0.74, "learning_rate": 4.286287529938724e-05, "loss": 0.7171, "step": 13835 }, { "epoch": 0.74, "learning_rate": 4.2857975173304906e-05, "loss": 0.6785, "step": 13840 }, { "epoch": 0.74, "learning_rate": 4.28530736459652e-05, "loss": 0.7787, "step": 13845 }, { "epoch": 0.74, "learning_rate": 4.284817071775271e-05, "loss": 0.7244, "step": 13850 }, { "epoch": 0.74, "learning_rate": 4.284326638905218e-05, "loss": 0.8608, "step": 13855 }, { "epoch": 0.74, "learning_rate": 4.283836066024841e-05, "loss": 0.7805, "step": 13860 }, { "epoch": 0.74, "learning_rate": 4.283345353172636e-05, "loss": 0.8232, "step": 13865 }, { "epoch": 0.74, "learning_rate": 4.282854500387107e-05, "loss": 0.7503, "step": 13870 }, { "epoch": 0.74, "learning_rate": 4.282363507706769e-05, "loss": 0.78, "step": 13875 }, { "epoch": 0.74, "learning_rate": 4.281872375170148e-05, "loss": 0.7393, "step": 13880 }, { "epoch": 0.74, "learning_rate": 4.281381102815784e-05, "loss": 0.674, "step": 13885 }, { "epoch": 0.74, "learning_rate": 4.280889690682223e-05, "loss": 0.806, "step": 13890 }, { "epoch": 0.74, "learning_rate": 4.2803981388080254e-05, "loss": 0.7558, "step": 13895 }, { "epoch": 0.74, "learning_rate": 4.279906447231763e-05, "loss": 0.8335, "step": 13900 }, { "epoch": 0.74, "learning_rate": 4.279414615992014e-05, "loss": 0.7716, "step": 13905 }, { "epoch": 0.74, "learning_rate": 4.278922645127375e-05, "loss": 0.9745, "step": 13910 }, { "epoch": 0.74, "learning_rate": 4.2784305346764464e-05, "loss": 0.8848, "step": 13915 }, { "epoch": 0.74, "learning_rate": 4.2779382846778434e-05, "loss": 0.8083, "step": 13920 }, { "epoch": 0.74, "learning_rate": 4.277445895170191e-05, "loss": 0.7511, "step": 13925 }, { "epoch": 0.75, "learning_rate": 4.276953366192126e-05, "loss": 0.9569, "step": 13930 }, { "epoch": 0.75, "learning_rate": 4.276460697782295e-05, "loss": 0.832, "step": 13935 }, { "epoch": 0.75, "learning_rate": 4.275967889979356e-05, "loss": 0.7719, "step": 13940 }, { "epoch": 0.75, "learning_rate": 4.275474942821978e-05, "loss": 0.8437, "step": 13945 }, { "epoch": 0.75, "learning_rate": 4.274981856348842e-05, "loss": 0.8975, "step": 13950 }, { "epoch": 0.75, "learning_rate": 4.2744886305986376e-05, "loss": 0.77, "step": 13955 }, { "epoch": 0.75, "learning_rate": 4.273995265610068e-05, "loss": 0.8018, "step": 13960 }, { "epoch": 0.75, "learning_rate": 4.2735017614218444e-05, "loss": 0.6188, "step": 13965 }, { "epoch": 0.75, "learning_rate": 4.273008118072691e-05, "loss": 0.8172, "step": 13970 }, { "epoch": 0.75, "learning_rate": 4.272514335601343e-05, "loss": 0.8165, "step": 13975 }, { "epoch": 0.75, "learning_rate": 4.272020414046546e-05, "loss": 0.7788, "step": 13980 }, { "epoch": 0.75, "learning_rate": 4.2715263534470545e-05, "loss": 0.8823, "step": 13985 }, { "epoch": 0.75, "learning_rate": 4.271032153841638e-05, "loss": 0.8042, "step": 13990 }, { "epoch": 0.75, "learning_rate": 4.2705378152690746e-05, "loss": 0.7676, "step": 13995 }, { "epoch": 0.75, "learning_rate": 4.2700433377681514e-05, "loss": 0.6615, "step": 14000 }, { "epoch": 0.75, "learning_rate": 4.2695487213776705e-05, "loss": 0.8033, "step": 14005 }, { "epoch": 0.75, "learning_rate": 4.269053966136443e-05, "loss": 0.7983, "step": 14010 }, { "epoch": 0.75, "learning_rate": 4.268559072083289e-05, "loss": 0.7349, "step": 14015 }, { "epoch": 0.75, "learning_rate": 4.2680640392570425e-05, "loss": 0.9242, "step": 14020 }, { "epoch": 0.75, "learning_rate": 4.267568867696548e-05, "loss": 0.891, "step": 14025 }, { "epoch": 0.75, "learning_rate": 4.267073557440657e-05, "loss": 0.8082, "step": 14030 }, { "epoch": 0.75, "learning_rate": 4.2665781085282376e-05, "loss": 0.8487, "step": 14035 }, { "epoch": 0.75, "learning_rate": 4.266082520998165e-05, "loss": 0.8148, "step": 14040 }, { "epoch": 0.75, "learning_rate": 4.265586794889327e-05, "loss": 0.6315, "step": 14045 }, { "epoch": 0.75, "learning_rate": 4.26509093024062e-05, "loss": 0.6232, "step": 14050 }, { "epoch": 0.75, "learning_rate": 4.264594927090955e-05, "loss": 0.7678, "step": 14055 }, { "epoch": 0.75, "learning_rate": 4.26409878547925e-05, "loss": 0.7228, "step": 14060 }, { "epoch": 0.75, "learning_rate": 4.263602505444437e-05, "loss": 0.6994, "step": 14065 }, { "epoch": 0.75, "learning_rate": 4.263106087025456e-05, "loss": 0.7507, "step": 14070 }, { "epoch": 0.75, "learning_rate": 4.262609530261262e-05, "loss": 0.6821, "step": 14075 }, { "epoch": 0.75, "learning_rate": 4.262112835190815e-05, "loss": 0.6878, "step": 14080 }, { "epoch": 0.75, "learning_rate": 4.26161600185309e-05, "loss": 0.6651, "step": 14085 }, { "epoch": 0.75, "learning_rate": 4.261119030287074e-05, "loss": 0.9296, "step": 14090 }, { "epoch": 0.75, "learning_rate": 4.2606219205317606e-05, "loss": 0.6205, "step": 14095 }, { "epoch": 0.75, "learning_rate": 4.260124672626156e-05, "loss": 0.646, "step": 14100 }, { "epoch": 0.75, "learning_rate": 4.25962728660928e-05, "loss": 0.7434, "step": 14105 }, { "epoch": 0.75, "learning_rate": 4.259129762520159e-05, "loss": 0.8019, "step": 14110 }, { "epoch": 0.76, "learning_rate": 4.258632100397831e-05, "loss": 0.6602, "step": 14115 }, { "epoch": 0.76, "learning_rate": 4.258134300281349e-05, "loss": 0.6979, "step": 14120 }, { "epoch": 0.76, "learning_rate": 4.257636362209772e-05, "loss": 0.7679, "step": 14125 }, { "epoch": 0.76, "learning_rate": 4.257138286222172e-05, "loss": 0.6943, "step": 14130 }, { "epoch": 0.76, "learning_rate": 4.25664007235763e-05, "loss": 0.6629, "step": 14135 }, { "epoch": 0.76, "learning_rate": 4.2561417206552415e-05, "loss": 0.8641, "step": 14140 }, { "epoch": 0.76, "learning_rate": 4.2556432311541095e-05, "loss": 0.7435, "step": 14145 }, { "epoch": 0.76, "learning_rate": 4.255144603893348e-05, "loss": 0.7812, "step": 14150 }, { "epoch": 0.76, "learning_rate": 4.2546458389120846e-05, "loss": 0.7806, "step": 14155 }, { "epoch": 0.76, "learning_rate": 4.254146936249455e-05, "loss": 0.7192, "step": 14160 }, { "epoch": 0.76, "learning_rate": 4.2536478959446046e-05, "loss": 0.8489, "step": 14165 }, { "epoch": 0.76, "learning_rate": 4.2531487180366934e-05, "loss": 0.9311, "step": 14170 }, { "epoch": 0.76, "learning_rate": 4.25264940256489e-05, "loss": 0.6379, "step": 14175 }, { "epoch": 0.76, "learning_rate": 4.252149949568374e-05, "loss": 0.8005, "step": 14180 }, { "epoch": 0.76, "learning_rate": 4.251650359086336e-05, "loss": 0.8579, "step": 14185 }, { "epoch": 0.76, "learning_rate": 4.251150631157977e-05, "loss": 0.7688, "step": 14190 }, { "epoch": 0.76, "learning_rate": 4.250650765822509e-05, "loss": 0.8505, "step": 14195 }, { "epoch": 0.76, "learning_rate": 4.250150763119155e-05, "loss": 0.7682, "step": 14200 }, { "epoch": 0.76, "learning_rate": 4.249650623087148e-05, "loss": 0.6978, "step": 14205 }, { "epoch": 0.76, "learning_rate": 4.2491503457657335e-05, "loss": 0.7054, "step": 14210 }, { "epoch": 0.76, "learning_rate": 4.248649931194165e-05, "loss": 0.8373, "step": 14215 }, { "epoch": 0.76, "learning_rate": 4.24814937941171e-05, "loss": 0.784, "step": 14220 }, { "epoch": 0.76, "learning_rate": 4.247648690457645e-05, "loss": 0.7168, "step": 14225 }, { "epoch": 0.76, "learning_rate": 4.247147864371256e-05, "loss": 0.7629, "step": 14230 }, { "epoch": 0.76, "learning_rate": 4.246646901191843e-05, "loss": 0.7039, "step": 14235 }, { "epoch": 0.76, "learning_rate": 4.246145800958714e-05, "loss": 0.8141, "step": 14240 }, { "epoch": 0.76, "learning_rate": 4.245644563711189e-05, "loss": 0.9522, "step": 14245 }, { "epoch": 0.76, "learning_rate": 4.245143189488598e-05, "loss": 0.8157, "step": 14250 }, { "epoch": 0.76, "learning_rate": 4.244641678330282e-05, "loss": 0.8473, "step": 14255 }, { "epoch": 0.76, "learning_rate": 4.2441400302755945e-05, "loss": 0.711, "step": 14260 }, { "epoch": 0.76, "learning_rate": 4.243638245363897e-05, "loss": 0.7445, "step": 14265 }, { "epoch": 0.76, "learning_rate": 4.2431363236345625e-05, "loss": 0.7772, "step": 14270 }, { "epoch": 0.76, "learning_rate": 4.242634265126977e-05, "loss": 0.7294, "step": 14275 }, { "epoch": 0.76, "learning_rate": 4.242132069880533e-05, "loss": 0.6996, "step": 14280 }, { "epoch": 0.76, "learning_rate": 4.2416297379346376e-05, "loss": 0.6972, "step": 14285 }, { "epoch": 0.76, "learning_rate": 4.2411272693287064e-05, "loss": 0.7665, "step": 14290 }, { "epoch": 0.76, "learning_rate": 4.240624664102167e-05, "loss": 0.739, "step": 14295 }, { "epoch": 0.77, "learning_rate": 4.240121922294459e-05, "loss": 0.7974, "step": 14300 }, { "epoch": 0.77, "learning_rate": 4.239619043945027e-05, "loss": 0.9701, "step": 14305 }, { "epoch": 0.77, "learning_rate": 4.239116029093333e-05, "loss": 0.9068, "step": 14310 }, { "epoch": 0.77, "learning_rate": 4.2386128777788465e-05, "loss": 0.947, "step": 14315 }, { "epoch": 0.77, "learning_rate": 4.238109590041047e-05, "loss": 0.6394, "step": 14320 }, { "epoch": 0.77, "learning_rate": 4.237606165919428e-05, "loss": 0.6713, "step": 14325 }, { "epoch": 0.77, "learning_rate": 4.2371026054534904e-05, "loss": 0.6775, "step": 14330 }, { "epoch": 0.77, "learning_rate": 4.2365989086827454e-05, "loss": 0.8097, "step": 14335 }, { "epoch": 0.77, "learning_rate": 4.236095075646719e-05, "loss": 0.8135, "step": 14340 }, { "epoch": 0.77, "learning_rate": 4.235591106384944e-05, "loss": 0.7608, "step": 14345 }, { "epoch": 0.77, "learning_rate": 4.2350870009369654e-05, "loss": 0.8754, "step": 14350 }, { "epoch": 0.77, "learning_rate": 4.234582759342339e-05, "loss": 0.8942, "step": 14355 }, { "epoch": 0.77, "learning_rate": 4.234078381640631e-05, "loss": 0.7907, "step": 14360 }, { "epoch": 0.77, "learning_rate": 4.233573867871418e-05, "loss": 0.7991, "step": 14365 }, { "epoch": 0.77, "learning_rate": 4.233069218074287e-05, "loss": 0.7618, "step": 14370 }, { "epoch": 0.77, "learning_rate": 4.232564432288838e-05, "loss": 0.7194, "step": 14375 }, { "epoch": 0.77, "learning_rate": 4.232059510554678e-05, "loss": 0.6555, "step": 14380 }, { "epoch": 0.77, "learning_rate": 4.231554452911427e-05, "loss": 0.6782, "step": 14385 }, { "epoch": 0.77, "learning_rate": 4.231049259398716e-05, "loss": 0.7596, "step": 14390 }, { "epoch": 0.77, "learning_rate": 4.230543930056186e-05, "loss": 0.76, "step": 14395 }, { "epoch": 0.77, "learning_rate": 4.230038464923488e-05, "loss": 0.6658, "step": 14400 }, { "epoch": 0.77, "learning_rate": 4.2295328640402836e-05, "loss": 0.8056, "step": 14405 }, { "epoch": 0.77, "learning_rate": 4.2290271274462464e-05, "loss": 0.6643, "step": 14410 }, { "epoch": 0.77, "learning_rate": 4.2285212551810604e-05, "loss": 0.7615, "step": 14415 }, { "epoch": 0.77, "learning_rate": 4.2280152472844194e-05, "loss": 0.8005, "step": 14420 }, { "epoch": 0.77, "learning_rate": 4.2275091037960276e-05, "loss": 0.7706, "step": 14425 }, { "epoch": 0.77, "learning_rate": 4.2270028247556e-05, "loss": 0.7255, "step": 14430 }, { "epoch": 0.77, "learning_rate": 4.2264964102028646e-05, "loss": 0.8158, "step": 14435 }, { "epoch": 0.77, "learning_rate": 4.2259898601775567e-05, "loss": 0.7915, "step": 14440 }, { "epoch": 0.77, "learning_rate": 4.225483174719424e-05, "loss": 1.0766, "step": 14445 }, { "epoch": 0.77, "learning_rate": 4.224976353868224e-05, "loss": 0.8752, "step": 14450 }, { "epoch": 0.77, "learning_rate": 4.224469397663726e-05, "loss": 0.7725, "step": 14455 }, { "epoch": 0.77, "learning_rate": 4.223962306145709e-05, "loss": 0.7454, "step": 14460 }, { "epoch": 0.77, "learning_rate": 4.223455079353963e-05, "loss": 0.7165, "step": 14465 }, { "epoch": 0.77, "learning_rate": 4.222947717328287e-05, "loss": 0.7469, "step": 14470 }, { "epoch": 0.77, "learning_rate": 4.2224402201084945e-05, "loss": 0.8593, "step": 14475 }, { "epoch": 0.77, "learning_rate": 4.2219325877344054e-05, "loss": 0.7903, "step": 14480 }, { "epoch": 0.77, "learning_rate": 4.2214248202458524e-05, "loss": 0.8492, "step": 14485 }, { "epoch": 0.78, "learning_rate": 4.2209169176826785e-05, "loss": 0.6709, "step": 14490 }, { "epoch": 0.78, "learning_rate": 4.220408880084737e-05, "loss": 0.7305, "step": 14495 }, { "epoch": 0.78, "learning_rate": 4.219900707491892e-05, "loss": 0.6952, "step": 14500 }, { "epoch": 0.78, "learning_rate": 4.219392399944018e-05, "loss": 0.7016, "step": 14505 }, { "epoch": 0.78, "learning_rate": 4.2188839574810014e-05, "loss": 0.9051, "step": 14510 }, { "epoch": 0.78, "learning_rate": 4.2183753801427364e-05, "loss": 0.8284, "step": 14515 }, { "epoch": 0.78, "learning_rate": 4.217866667969129e-05, "loss": 0.7009, "step": 14520 }, { "epoch": 0.78, "learning_rate": 4.217357821000099e-05, "loss": 0.7906, "step": 14525 }, { "epoch": 0.78, "learning_rate": 4.2168488392755715e-05, "loss": 0.7416, "step": 14530 }, { "epoch": 0.78, "learning_rate": 4.216339722835486e-05, "loss": 0.8503, "step": 14535 }, { "epoch": 0.78, "learning_rate": 4.215830471719789e-05, "loss": 0.8094, "step": 14540 }, { "epoch": 0.78, "learning_rate": 4.215321085968443e-05, "loss": 0.7569, "step": 14545 }, { "epoch": 0.78, "learning_rate": 4.214811565621416e-05, "loss": 0.7285, "step": 14550 }, { "epoch": 0.78, "learning_rate": 4.214301910718688e-05, "loss": 0.7024, "step": 14555 }, { "epoch": 0.78, "learning_rate": 4.213792121300252e-05, "loss": 0.6515, "step": 14560 }, { "epoch": 0.78, "learning_rate": 4.2132821974061064e-05, "loss": 0.7869, "step": 14565 }, { "epoch": 0.78, "learning_rate": 4.212772139076266e-05, "loss": 0.7572, "step": 14570 }, { "epoch": 0.78, "learning_rate": 4.2122619463507516e-05, "loss": 0.8004, "step": 14575 }, { "epoch": 0.78, "learning_rate": 4.2117516192695986e-05, "loss": 0.8827, "step": 14580 }, { "epoch": 0.78, "learning_rate": 4.211241157872848e-05, "loss": 0.8307, "step": 14585 }, { "epoch": 0.78, "learning_rate": 4.210730562200557e-05, "loss": 0.7506, "step": 14590 }, { "epoch": 0.78, "learning_rate": 4.210219832292787e-05, "loss": 0.7545, "step": 14595 }, { "epoch": 0.78, "learning_rate": 4.209708968189615e-05, "loss": 0.7133, "step": 14600 }, { "epoch": 0.78, "learning_rate": 4.209197969931128e-05, "loss": 0.8743, "step": 14605 }, { "epoch": 0.78, "learning_rate": 4.208686837557421e-05, "loss": 0.7308, "step": 14610 }, { "epoch": 0.78, "learning_rate": 4.2081755711086014e-05, "loss": 0.8478, "step": 14615 }, { "epoch": 0.78, "learning_rate": 4.207664170624786e-05, "loss": 0.9201, "step": 14620 }, { "epoch": 0.78, "learning_rate": 4.2071526361461034e-05, "loss": 0.7675, "step": 14625 }, { "epoch": 0.78, "learning_rate": 4.206640967712691e-05, "loss": 0.6525, "step": 14630 }, { "epoch": 0.78, "learning_rate": 4.2061291653646996e-05, "loss": 0.8383, "step": 14635 }, { "epoch": 0.78, "learning_rate": 4.205617229142287e-05, "loss": 0.791, "step": 14640 }, { "epoch": 0.78, "learning_rate": 4.205105159085624e-05, "loss": 0.8871, "step": 14645 }, { "epoch": 0.78, "learning_rate": 4.2045929552348914e-05, "loss": 0.8507, "step": 14650 }, { "epoch": 0.78, "learning_rate": 4.2040806176302795e-05, "loss": 0.6648, "step": 14655 }, { "epoch": 0.78, "learning_rate": 4.203568146311989e-05, "loss": 0.8827, "step": 14660 }, { "epoch": 0.78, "learning_rate": 4.203055541320233e-05, "loss": 0.8537, "step": 14665 }, { "epoch": 0.78, "learning_rate": 4.202542802695235e-05, "loss": 0.6253, "step": 14670 }, { "epoch": 0.79, "learning_rate": 4.202029930477226e-05, "loss": 0.8452, "step": 14675 }, { "epoch": 0.79, "learning_rate": 4.2015169247064494e-05, "loss": 0.8675, "step": 14680 }, { "epoch": 0.79, "learning_rate": 4.20100378542316e-05, "loss": 0.7464, "step": 14685 }, { "epoch": 0.79, "learning_rate": 4.2004905126676225e-05, "loss": 0.7281, "step": 14690 }, { "epoch": 0.79, "learning_rate": 4.199977106480111e-05, "loss": 0.7011, "step": 14695 }, { "epoch": 0.79, "learning_rate": 4.199463566900911e-05, "loss": 0.8669, "step": 14700 }, { "epoch": 0.79, "learning_rate": 4.1989498939703186e-05, "loss": 0.5759, "step": 14705 }, { "epoch": 0.79, "learning_rate": 4.19843608772864e-05, "loss": 0.8512, "step": 14710 }, { "epoch": 0.79, "learning_rate": 4.197922148216191e-05, "loss": 0.6725, "step": 14715 }, { "epoch": 0.79, "learning_rate": 4.1974080754732994e-05, "loss": 0.7492, "step": 14720 }, { "epoch": 0.79, "learning_rate": 4.1968938695403026e-05, "loss": 0.7745, "step": 14725 }, { "epoch": 0.79, "learning_rate": 4.1963795304575497e-05, "loss": 0.7565, "step": 14730 }, { "epoch": 0.79, "learning_rate": 4.1958650582653986e-05, "loss": 0.7457, "step": 14735 }, { "epoch": 0.79, "learning_rate": 4.195350453004218e-05, "loss": 0.7437, "step": 14740 }, { "epoch": 0.79, "learning_rate": 4.194835714714386e-05, "loss": 0.7795, "step": 14745 }, { "epoch": 0.79, "learning_rate": 4.194320843436296e-05, "loss": 0.8125, "step": 14750 }, { "epoch": 0.79, "learning_rate": 4.193805839210344e-05, "loss": 0.7636, "step": 14755 }, { "epoch": 0.79, "learning_rate": 4.193290702076945e-05, "loss": 0.7778, "step": 14760 }, { "epoch": 0.79, "learning_rate": 4.1927754320765166e-05, "loss": 0.8949, "step": 14765 }, { "epoch": 0.79, "learning_rate": 4.192260029249492e-05, "loss": 0.8191, "step": 14770 }, { "epoch": 0.79, "learning_rate": 4.191744493636313e-05, "loss": 0.6705, "step": 14775 }, { "epoch": 0.79, "learning_rate": 4.1912288252774326e-05, "loss": 0.7548, "step": 14780 }, { "epoch": 0.79, "learning_rate": 4.190713024213312e-05, "loss": 0.6672, "step": 14785 }, { "epoch": 0.79, "learning_rate": 4.190197090484426e-05, "loss": 0.8458, "step": 14790 }, { "epoch": 0.79, "learning_rate": 4.189681024131258e-05, "loss": 0.8437, "step": 14795 }, { "epoch": 0.79, "learning_rate": 4.1891648251943006e-05, "loss": 0.7822, "step": 14800 }, { "epoch": 0.79, "learning_rate": 4.18864849371406e-05, "loss": 0.8932, "step": 14805 }, { "epoch": 0.79, "learning_rate": 4.18813202973105e-05, "loss": 0.6989, "step": 14810 }, { "epoch": 0.79, "learning_rate": 4.187615433285797e-05, "loss": 0.8448, "step": 14815 }, { "epoch": 0.79, "learning_rate": 4.187098704418836e-05, "loss": 0.7325, "step": 14820 }, { "epoch": 0.79, "learning_rate": 4.1865818431707124e-05, "loss": 0.8346, "step": 14825 }, { "epoch": 0.79, "learning_rate": 4.186064849581983e-05, "loss": 0.7526, "step": 14830 }, { "epoch": 0.79, "learning_rate": 4.185547723693215e-05, "loss": 0.7775, "step": 14835 }, { "epoch": 0.79, "learning_rate": 4.1850304655449855e-05, "loss": 0.8915, "step": 14840 }, { "epoch": 0.79, "learning_rate": 4.1845130751778826e-05, "loss": 0.739, "step": 14845 }, { "epoch": 0.79, "learning_rate": 4.1839955526325026e-05, "loss": 0.7897, "step": 14850 }, { "epoch": 0.79, "learning_rate": 4.1834778979494556e-05, "loss": 0.657, "step": 14855 }, { "epoch": 0.79, "learning_rate": 4.182960111169359e-05, "loss": 0.6768, "step": 14860 }, { "epoch": 0.8, "learning_rate": 4.1824421923328427e-05, "loss": 0.7218, "step": 14865 }, { "epoch": 0.8, "learning_rate": 4.181924141480545e-05, "loss": 0.815, "step": 14870 }, { "epoch": 0.8, "learning_rate": 4.1814059586531174e-05, "loss": 0.7547, "step": 14875 }, { "epoch": 0.8, "learning_rate": 4.180887643891218e-05, "loss": 0.7784, "step": 14880 }, { "epoch": 0.8, "learning_rate": 4.1803691972355195e-05, "loss": 0.6969, "step": 14885 }, { "epoch": 0.8, "learning_rate": 4.1798506187267004e-05, "loss": 0.8073, "step": 14890 }, { "epoch": 0.8, "learning_rate": 4.179331908405454e-05, "loss": 0.8073, "step": 14895 }, { "epoch": 0.8, "learning_rate": 4.1788130663124804e-05, "loss": 0.6123, "step": 14900 }, { "epoch": 0.8, "learning_rate": 4.178294092488492e-05, "loss": 0.8278, "step": 14905 }, { "epoch": 0.8, "learning_rate": 4.177774986974211e-05, "loss": 0.8543, "step": 14910 }, { "epoch": 0.8, "learning_rate": 4.177255749810369e-05, "loss": 0.8314, "step": 14915 }, { "epoch": 0.8, "learning_rate": 4.176736381037712e-05, "loss": 0.8078, "step": 14920 }, { "epoch": 0.8, "learning_rate": 4.176216880696988e-05, "loss": 0.7023, "step": 14925 }, { "epoch": 0.8, "learning_rate": 4.1756972488289656e-05, "loss": 0.7758, "step": 14930 }, { "epoch": 0.8, "learning_rate": 4.175177485474415e-05, "loss": 0.8373, "step": 14935 }, { "epoch": 0.8, "learning_rate": 4.174657590674122e-05, "loss": 0.8394, "step": 14940 }, { "epoch": 0.8, "learning_rate": 4.174137564468881e-05, "loss": 0.766, "step": 14945 }, { "epoch": 0.8, "learning_rate": 4.173617406899496e-05, "loss": 0.8331, "step": 14950 }, { "epoch": 0.8, "learning_rate": 4.173097118006783e-05, "loss": 0.8118, "step": 14955 }, { "epoch": 0.8, "learning_rate": 4.1725766978315675e-05, "loss": 0.7548, "step": 14960 }, { "epoch": 0.8, "learning_rate": 4.172056146414684e-05, "loss": 0.7587, "step": 14965 }, { "epoch": 0.8, "learning_rate": 4.17153546379698e-05, "loss": 0.7882, "step": 14970 }, { "epoch": 0.8, "learning_rate": 4.1710146500193106e-05, "loss": 0.8631, "step": 14975 }, { "epoch": 0.8, "learning_rate": 4.170493705122543e-05, "loss": 0.6468, "step": 14980 }, { "epoch": 0.8, "learning_rate": 4.1699726291475524e-05, "loss": 0.7515, "step": 14985 }, { "epoch": 0.8, "learning_rate": 4.169451422135229e-05, "loss": 0.9514, "step": 14990 }, { "epoch": 0.8, "learning_rate": 4.168930084126468e-05, "loss": 0.7948, "step": 14995 }, { "epoch": 0.8, "learning_rate": 4.168408615162178e-05, "loss": 0.8938, "step": 15000 }, { "epoch": 0.8, "learning_rate": 4.167887015283276e-05, "loss": 0.6644, "step": 15005 }, { "epoch": 0.8, "learning_rate": 4.167365284530691e-05, "loss": 0.784, "step": 15010 }, { "epoch": 0.8, "learning_rate": 4.166843422945362e-05, "loss": 0.7293, "step": 15015 }, { "epoch": 0.8, "learning_rate": 4.166321430568236e-05, "loss": 0.8173, "step": 15020 }, { "epoch": 0.8, "learning_rate": 4.1657993074402745e-05, "loss": 0.6368, "step": 15025 }, { "epoch": 0.8, "learning_rate": 4.1652770536024445e-05, "loss": 0.7605, "step": 15030 }, { "epoch": 0.8, "learning_rate": 4.164754669095727e-05, "loss": 0.8674, "step": 15035 }, { "epoch": 0.8, "learning_rate": 4.164232153961112e-05, "loss": 0.8138, "step": 15040 }, { "epoch": 0.8, "learning_rate": 4.1637095082395985e-05, "loss": 0.6695, "step": 15045 }, { "epoch": 0.81, "learning_rate": 4.163186731972197e-05, "loss": 0.7654, "step": 15050 }, { "epoch": 0.81, "learning_rate": 4.162663825199929e-05, "loss": 0.7337, "step": 15055 }, { "epoch": 0.81, "learning_rate": 4.162140787963824e-05, "loss": 0.7798, "step": 15060 }, { "epoch": 0.81, "learning_rate": 4.161617620304924e-05, "loss": 0.7638, "step": 15065 }, { "epoch": 0.81, "learning_rate": 4.16109432226428e-05, "loss": 0.8025, "step": 15070 }, { "epoch": 0.81, "learning_rate": 4.1605708938829535e-05, "loss": 0.714, "step": 15075 }, { "epoch": 0.81, "learning_rate": 4.1600473352020166e-05, "loss": 0.7884, "step": 15080 }, { "epoch": 0.81, "learning_rate": 4.15952364626255e-05, "loss": 0.7055, "step": 15085 }, { "epoch": 0.81, "learning_rate": 4.1589998271056473e-05, "loss": 0.7577, "step": 15090 }, { "epoch": 0.81, "learning_rate": 4.15847587777241e-05, "loss": 0.6966, "step": 15095 }, { "epoch": 0.81, "learning_rate": 4.1579517983039514e-05, "loss": 0.7704, "step": 15100 }, { "epoch": 0.81, "learning_rate": 4.157427588741394e-05, "loss": 0.8016, "step": 15105 }, { "epoch": 0.81, "learning_rate": 4.1569032491258695e-05, "loss": 0.6717, "step": 15110 }, { "epoch": 0.81, "learning_rate": 4.156378779498524e-05, "loss": 0.804, "step": 15115 }, { "epoch": 0.81, "learning_rate": 4.155854179900508e-05, "loss": 0.7728, "step": 15120 }, { "epoch": 0.81, "learning_rate": 4.1553294503729875e-05, "loss": 0.745, "step": 15125 }, { "epoch": 0.81, "learning_rate": 4.1548045909571354e-05, "loss": 0.8904, "step": 15130 }, { "epoch": 0.81, "learning_rate": 4.1542796016941344e-05, "loss": 0.6619, "step": 15135 }, { "epoch": 0.81, "learning_rate": 4.153754482625181e-05, "loss": 0.6701, "step": 15140 }, { "epoch": 0.81, "learning_rate": 4.1532292337914775e-05, "loss": 0.6635, "step": 15145 }, { "epoch": 0.81, "learning_rate": 4.1527038552342394e-05, "loss": 0.6441, "step": 15150 }, { "epoch": 0.81, "learning_rate": 4.152178346994692e-05, "loss": 0.6902, "step": 15155 }, { "epoch": 0.81, "learning_rate": 4.15165270911407e-05, "loss": 0.8804, "step": 15160 }, { "epoch": 0.81, "learning_rate": 4.151126941633619e-05, "loss": 0.9651, "step": 15165 }, { "epoch": 0.81, "learning_rate": 4.150601044594591e-05, "loss": 0.8558, "step": 15170 }, { "epoch": 0.81, "learning_rate": 4.1500750180382555e-05, "loss": 0.8583, "step": 15175 }, { "epoch": 0.81, "learning_rate": 4.1495488620058865e-05, "loss": 0.9248, "step": 15180 }, { "epoch": 0.81, "learning_rate": 4.149022576538769e-05, "loss": 0.6452, "step": 15185 }, { "epoch": 0.81, "learning_rate": 4.1484961616782016e-05, "loss": 0.746, "step": 15190 }, { "epoch": 0.81, "learning_rate": 4.147969617465487e-05, "loss": 0.7649, "step": 15195 }, { "epoch": 0.81, "learning_rate": 4.1474429439419426e-05, "loss": 0.8337, "step": 15200 }, { "epoch": 0.81, "learning_rate": 4.146916141148896e-05, "loss": 0.8712, "step": 15205 }, { "epoch": 0.81, "learning_rate": 4.146389209127682e-05, "loss": 0.6799, "step": 15210 }, { "epoch": 0.81, "learning_rate": 4.145862147919648e-05, "loss": 0.6875, "step": 15215 }, { "epoch": 0.81, "learning_rate": 4.145334957566151e-05, "loss": 0.8359, "step": 15220 }, { "epoch": 0.81, "learning_rate": 4.144807638108558e-05, "loss": 0.6971, "step": 15225 }, { "epoch": 0.81, "learning_rate": 4.1442801895882454e-05, "loss": 0.8152, "step": 15230 }, { "epoch": 0.82, "learning_rate": 4.143752612046601e-05, "loss": 0.7915, "step": 15235 }, { "epoch": 0.82, "learning_rate": 4.143224905525021e-05, "loss": 0.8422, "step": 15240 }, { "epoch": 0.82, "learning_rate": 4.1426970700649147e-05, "loss": 0.6184, "step": 15245 }, { "epoch": 0.82, "learning_rate": 4.1421691057076975e-05, "loss": 0.7222, "step": 15250 }, { "epoch": 0.82, "learning_rate": 4.141641012494799e-05, "loss": 0.8185, "step": 15255 }, { "epoch": 0.82, "learning_rate": 4.1411127904676556e-05, "loss": 0.7646, "step": 15260 }, { "epoch": 0.82, "learning_rate": 4.1405844396677153e-05, "loss": 0.856, "step": 15265 }, { "epoch": 0.82, "learning_rate": 4.140055960136437e-05, "loss": 0.774, "step": 15270 }, { "epoch": 0.82, "learning_rate": 4.139527351915288e-05, "loss": 0.8261, "step": 15275 }, { "epoch": 0.82, "learning_rate": 4.138998615045747e-05, "loss": 0.6593, "step": 15280 }, { "epoch": 0.82, "learning_rate": 4.1384697495693014e-05, "loss": 0.7459, "step": 15285 }, { "epoch": 0.82, "learning_rate": 4.1379407555274507e-05, "loss": 0.739, "step": 15290 }, { "epoch": 0.82, "learning_rate": 4.137411632961702e-05, "loss": 0.7252, "step": 15295 }, { "epoch": 0.82, "learning_rate": 4.136882381913575e-05, "loss": 0.7608, "step": 15300 }, { "epoch": 0.82, "learning_rate": 4.1363530024245986e-05, "loss": 0.8032, "step": 15305 }, { "epoch": 0.82, "learning_rate": 4.13582349453631e-05, "loss": 0.8075, "step": 15310 }, { "epoch": 0.82, "learning_rate": 4.135293858290258e-05, "loss": 0.8385, "step": 15315 }, { "epoch": 0.82, "learning_rate": 4.134764093728003e-05, "loss": 0.9119, "step": 15320 }, { "epoch": 0.82, "learning_rate": 4.1342342008911126e-05, "loss": 0.8657, "step": 15325 }, { "epoch": 0.82, "learning_rate": 4.1337041798211675e-05, "loss": 0.7458, "step": 15330 }, { "epoch": 0.82, "learning_rate": 4.1331740305597546e-05, "loss": 0.8361, "step": 15335 }, { "epoch": 0.82, "learning_rate": 4.1326437531484734e-05, "loss": 0.9129, "step": 15340 }, { "epoch": 0.82, "learning_rate": 4.132113347628934e-05, "loss": 0.8046, "step": 15345 }, { "epoch": 0.82, "learning_rate": 4.131582814042755e-05, "loss": 0.7866, "step": 15350 }, { "epoch": 0.82, "learning_rate": 4.131052152431566e-05, "loss": 0.6996, "step": 15355 }, { "epoch": 0.82, "learning_rate": 4.1305213628370065e-05, "loss": 0.7152, "step": 15360 }, { "epoch": 0.82, "learning_rate": 4.1299904453007245e-05, "loss": 0.7719, "step": 15365 }, { "epoch": 0.82, "learning_rate": 4.1294593998643805e-05, "loss": 0.7073, "step": 15370 }, { "epoch": 0.82, "learning_rate": 4.1289282265696436e-05, "loss": 0.635, "step": 15375 }, { "epoch": 0.82, "learning_rate": 4.128396925458194e-05, "loss": 0.7967, "step": 15380 }, { "epoch": 0.82, "learning_rate": 4.12786549657172e-05, "loss": 0.8185, "step": 15385 }, { "epoch": 0.82, "learning_rate": 4.127333939951922e-05, "loss": 0.7898, "step": 15390 }, { "epoch": 0.82, "learning_rate": 4.1268022556405086e-05, "loss": 0.8471, "step": 15395 }, { "epoch": 0.82, "learning_rate": 4.1262704436792006e-05, "loss": 0.7575, "step": 15400 }, { "epoch": 0.82, "learning_rate": 4.125738504109726e-05, "loss": 0.7044, "step": 15405 }, { "epoch": 0.82, "learning_rate": 4.1252064369738256e-05, "loss": 0.7422, "step": 15410 }, { "epoch": 0.82, "learning_rate": 4.124674242313249e-05, "loss": 0.8324, "step": 15415 }, { "epoch": 0.82, "learning_rate": 4.124141920169755e-05, "loss": 0.7179, "step": 15420 }, { "epoch": 0.83, "learning_rate": 4.1236094705851136e-05, "loss": 0.986, "step": 15425 }, { "epoch": 0.83, "learning_rate": 4.1230768936011045e-05, "loss": 0.6335, "step": 15430 }, { "epoch": 0.83, "learning_rate": 4.122544189259517e-05, "loss": 0.8705, "step": 15435 }, { "epoch": 0.83, "learning_rate": 4.122011357602151e-05, "loss": 0.7458, "step": 15440 }, { "epoch": 0.83, "learning_rate": 4.1214783986708156e-05, "loss": 0.7942, "step": 15445 }, { "epoch": 0.83, "learning_rate": 4.12094531250733e-05, "loss": 0.7531, "step": 15450 }, { "epoch": 0.83, "learning_rate": 4.120412099153525e-05, "loss": 0.6637, "step": 15455 }, { "epoch": 0.83, "learning_rate": 4.119878758651241e-05, "loss": 0.8635, "step": 15460 }, { "epoch": 0.83, "learning_rate": 4.1193452910423246e-05, "loss": 0.8071, "step": 15465 }, { "epoch": 0.83, "learning_rate": 4.118811696368637e-05, "loss": 0.6934, "step": 15470 }, { "epoch": 0.83, "learning_rate": 4.118277974672047e-05, "loss": 0.7153, "step": 15475 }, { "epoch": 0.83, "learning_rate": 4.117744125994435e-05, "loss": 0.7298, "step": 15480 }, { "epoch": 0.83, "learning_rate": 4.117210150377689e-05, "loss": 0.7669, "step": 15485 }, { "epoch": 0.83, "learning_rate": 4.116676047863709e-05, "loss": 0.6456, "step": 15490 }, { "epoch": 0.83, "learning_rate": 4.116141818494406e-05, "loss": 0.9272, "step": 15495 }, { "epoch": 0.83, "learning_rate": 4.115607462311696e-05, "loss": 0.7576, "step": 15500 }, { "epoch": 0.83, "learning_rate": 4.1150729793575104e-05, "loss": 0.7664, "step": 15505 }, { "epoch": 0.83, "learning_rate": 4.114538369673787e-05, "loss": 0.7221, "step": 15510 }, { "epoch": 0.83, "learning_rate": 4.114003633302476e-05, "loss": 0.6396, "step": 15515 }, { "epoch": 0.83, "learning_rate": 4.1134687702855365e-05, "loss": 0.766, "step": 15520 }, { "epoch": 0.83, "learning_rate": 4.1129337806649365e-05, "loss": 0.8143, "step": 15525 }, { "epoch": 0.83, "learning_rate": 4.112398664482656e-05, "loss": 0.7454, "step": 15530 }, { "epoch": 0.83, "learning_rate": 4.111863421780683e-05, "loss": 0.7416, "step": 15535 }, { "epoch": 0.83, "learning_rate": 4.111328052601017e-05, "loss": 0.6742, "step": 15540 }, { "epoch": 0.83, "learning_rate": 4.110792556985666e-05, "loss": 0.8823, "step": 15545 }, { "epoch": 0.83, "learning_rate": 4.110256934976647e-05, "loss": 0.8482, "step": 15550 }, { "epoch": 0.83, "learning_rate": 4.109721186615992e-05, "loss": 0.692, "step": 15555 }, { "epoch": 0.83, "learning_rate": 4.109185311945738e-05, "loss": 0.7132, "step": 15560 }, { "epoch": 0.83, "learning_rate": 4.1086493110079326e-05, "loss": 0.8627, "step": 15565 }, { "epoch": 0.83, "learning_rate": 4.108113183844634e-05, "loss": 0.8395, "step": 15570 }, { "epoch": 0.83, "learning_rate": 4.107576930497912e-05, "loss": 0.8132, "step": 15575 }, { "epoch": 0.83, "learning_rate": 4.107040551009843e-05, "loss": 0.8542, "step": 15580 }, { "epoch": 0.83, "learning_rate": 4.106504045422515e-05, "loss": 0.7814, "step": 15585 }, { "epoch": 0.83, "learning_rate": 4.1059674137780275e-05, "loss": 0.8744, "step": 15590 }, { "epoch": 0.83, "learning_rate": 4.105430656118486e-05, "loss": 0.7, "step": 15595 }, { "epoch": 0.83, "learning_rate": 4.104893772486011e-05, "loss": 0.6878, "step": 15600 }, { "epoch": 0.83, "learning_rate": 4.1043567629227265e-05, "loss": 0.807, "step": 15605 }, { "epoch": 0.84, "learning_rate": 4.103819627470772e-05, "loss": 0.723, "step": 15610 }, { "epoch": 0.84, "learning_rate": 4.103282366172295e-05, "loss": 0.6832, "step": 15615 }, { "epoch": 0.84, "learning_rate": 4.102744979069452e-05, "loss": 0.8209, "step": 15620 }, { "epoch": 0.84, "learning_rate": 4.10220746620441e-05, "loss": 0.8223, "step": 15625 }, { "epoch": 0.84, "learning_rate": 4.101669827619346e-05, "loss": 0.8338, "step": 15630 }, { "epoch": 0.84, "learning_rate": 4.101132063356447e-05, "loss": 0.7741, "step": 15635 }, { "epoch": 0.84, "learning_rate": 4.100594173457909e-05, "loss": 0.8217, "step": 15640 }, { "epoch": 0.84, "learning_rate": 4.1000561579659396e-05, "loss": 0.7858, "step": 15645 }, { "epoch": 0.84, "learning_rate": 4.0995180169227536e-05, "loss": 0.7079, "step": 15650 }, { "epoch": 0.84, "learning_rate": 4.0989797503705784e-05, "loss": 0.8236, "step": 15655 }, { "epoch": 0.84, "learning_rate": 4.098441358351649e-05, "loss": 0.7493, "step": 15660 }, { "epoch": 0.84, "learning_rate": 4.097902840908213e-05, "loss": 0.6868, "step": 15665 }, { "epoch": 0.84, "learning_rate": 4.097364198082524e-05, "loss": 0.8257, "step": 15670 }, { "epoch": 0.84, "learning_rate": 4.096825429916849e-05, "loss": 0.8279, "step": 15675 }, { "epoch": 0.84, "learning_rate": 4.096286536453463e-05, "loss": 0.7511, "step": 15680 }, { "epoch": 0.84, "learning_rate": 4.095747517734651e-05, "loss": 0.8088, "step": 15685 }, { "epoch": 0.84, "learning_rate": 4.095208373802708e-05, "loss": 0.8327, "step": 15690 }, { "epoch": 0.84, "learning_rate": 4.09466910469994e-05, "loss": 0.708, "step": 15695 }, { "epoch": 0.84, "learning_rate": 4.0941297104686597e-05, "loss": 0.9236, "step": 15700 }, { "epoch": 0.84, "learning_rate": 4.093590191151193e-05, "loss": 0.9189, "step": 15705 }, { "epoch": 0.84, "learning_rate": 4.093050546789874e-05, "loss": 0.6486, "step": 15710 }, { "epoch": 0.84, "learning_rate": 4.092510777427048e-05, "loss": 0.7899, "step": 15715 }, { "epoch": 0.84, "learning_rate": 4.091970883105066e-05, "loss": 0.6896, "step": 15720 }, { "epoch": 0.84, "learning_rate": 4.0914308638662935e-05, "loss": 0.8377, "step": 15725 }, { "epoch": 0.84, "learning_rate": 4.0908907197531054e-05, "loss": 0.7677, "step": 15730 }, { "epoch": 0.84, "learning_rate": 4.0903504508078825e-05, "loss": 0.8673, "step": 15735 }, { "epoch": 0.84, "learning_rate": 4.08981005707302e-05, "loss": 0.7684, "step": 15740 }, { "epoch": 0.84, "learning_rate": 4.0892695385909195e-05, "loss": 0.743, "step": 15745 }, { "epoch": 0.84, "learning_rate": 4.0887288954039945e-05, "loss": 0.6505, "step": 15750 }, { "epoch": 0.84, "learning_rate": 4.088188127554666e-05, "loss": 0.7623, "step": 15755 }, { "epoch": 0.84, "learning_rate": 4.0876472350853695e-05, "loss": 0.8144, "step": 15760 }, { "epoch": 0.84, "learning_rate": 4.087106218038544e-05, "loss": 0.8852, "step": 15765 }, { "epoch": 0.84, "learning_rate": 4.086565076456643e-05, "loss": 0.7938, "step": 15770 }, { "epoch": 0.84, "learning_rate": 4.086023810382127e-05, "loss": 0.8009, "step": 15775 }, { "epoch": 0.84, "learning_rate": 4.0854824198574684e-05, "loss": 0.6205, "step": 15780 }, { "epoch": 0.84, "learning_rate": 4.0849409049251476e-05, "loss": 0.6615, "step": 15785 }, { "epoch": 0.84, "learning_rate": 4.084399265627656e-05, "loss": 0.9059, "step": 15790 }, { "epoch": 0.84, "learning_rate": 4.083857502007494e-05, "loss": 0.7041, "step": 15795 }, { "epoch": 0.85, "learning_rate": 4.0833156141071725e-05, "loss": 0.7699, "step": 15800 }, { "epoch": 0.85, "learning_rate": 4.082773601969212e-05, "loss": 0.7804, "step": 15805 }, { "epoch": 0.85, "learning_rate": 4.08223146563614e-05, "loss": 0.8664, "step": 15810 }, { "epoch": 0.85, "learning_rate": 4.0816892051504994e-05, "loss": 0.797, "step": 15815 }, { "epoch": 0.85, "learning_rate": 4.081146820554839e-05, "loss": 0.9085, "step": 15820 }, { "epoch": 0.85, "learning_rate": 4.080604311891716e-05, "loss": 0.6532, "step": 15825 }, { "epoch": 0.85, "learning_rate": 4.0800616792037e-05, "loss": 0.6988, "step": 15830 }, { "epoch": 0.85, "learning_rate": 4.079518922533371e-05, "loss": 0.9728, "step": 15835 }, { "epoch": 0.85, "learning_rate": 4.078976041923316e-05, "loss": 0.8549, "step": 15840 }, { "epoch": 0.85, "learning_rate": 4.078433037416133e-05, "loss": 0.9004, "step": 15845 }, { "epoch": 0.85, "learning_rate": 4.0778899090544306e-05, "loss": 0.7381, "step": 15850 }, { "epoch": 0.85, "learning_rate": 4.0773466568808264e-05, "loss": 0.7542, "step": 15855 }, { "epoch": 0.85, "learning_rate": 4.0768032809379474e-05, "loss": 0.6511, "step": 15860 }, { "epoch": 0.85, "learning_rate": 4.07625978126843e-05, "loss": 0.8358, "step": 15865 }, { "epoch": 0.85, "learning_rate": 4.075716157914922e-05, "loss": 0.8453, "step": 15870 }, { "epoch": 0.85, "learning_rate": 4.0751724109200786e-05, "loss": 0.6953, "step": 15875 }, { "epoch": 0.85, "learning_rate": 4.074628540326566e-05, "loss": 0.8037, "step": 15880 }, { "epoch": 0.85, "learning_rate": 4.074084546177061e-05, "loss": 0.6102, "step": 15885 }, { "epoch": 0.85, "learning_rate": 4.073540428514247e-05, "loss": 0.7115, "step": 15890 }, { "epoch": 0.85, "learning_rate": 4.0729961873808206e-05, "loss": 0.7593, "step": 15895 }, { "epoch": 0.85, "learning_rate": 4.072451822819487e-05, "loss": 0.7675, "step": 15900 }, { "epoch": 0.85, "learning_rate": 4.071907334872961e-05, "loss": 0.7334, "step": 15905 }, { "epoch": 0.85, "learning_rate": 4.071362723583966e-05, "loss": 0.7047, "step": 15910 }, { "epoch": 0.85, "learning_rate": 4.0708179889952344e-05, "loss": 0.7953, "step": 15915 }, { "epoch": 0.85, "learning_rate": 4.0702731311495124e-05, "loss": 0.7965, "step": 15920 }, { "epoch": 0.85, "learning_rate": 4.069728150089552e-05, "loss": 0.7737, "step": 15925 }, { "epoch": 0.85, "learning_rate": 4.069183045858116e-05, "loss": 0.6713, "step": 15930 }, { "epoch": 0.85, "learning_rate": 4.0686378184979775e-05, "loss": 0.7556, "step": 15935 }, { "epoch": 0.85, "learning_rate": 4.068092468051918e-05, "loss": 0.8424, "step": 15940 }, { "epoch": 0.85, "learning_rate": 4.06754699456273e-05, "loss": 0.6981, "step": 15945 }, { "epoch": 0.85, "learning_rate": 4.067001398073214e-05, "loss": 0.8353, "step": 15950 }, { "epoch": 0.85, "learning_rate": 4.066455678626183e-05, "loss": 0.8526, "step": 15955 }, { "epoch": 0.85, "learning_rate": 4.0659098362644566e-05, "loss": 0.8427, "step": 15960 }, { "epoch": 0.85, "learning_rate": 4.0653638710308654e-05, "loss": 0.8564, "step": 15965 }, { "epoch": 0.85, "learning_rate": 4.0648177829682486e-05, "loss": 0.6733, "step": 15970 }, { "epoch": 0.85, "learning_rate": 4.064271572119458e-05, "loss": 0.7721, "step": 15975 }, { "epoch": 0.85, "learning_rate": 4.063725238527352e-05, "loss": 0.7879, "step": 15980 }, { "epoch": 0.86, "learning_rate": 4.0631787822347985e-05, "loss": 0.8468, "step": 15985 }, { "epoch": 0.86, "learning_rate": 4.0626322032846794e-05, "loss": 0.6828, "step": 15990 }, { "epoch": 0.86, "learning_rate": 4.062085501719879e-05, "loss": 0.6688, "step": 15995 }, { "epoch": 0.86, "learning_rate": 4.0615386775832976e-05, "loss": 0.8081, "step": 16000 }, { "epoch": 0.86, "learning_rate": 4.060991730917842e-05, "loss": 0.7331, "step": 16005 }, { "epoch": 0.86, "learning_rate": 4.060444661766429e-05, "loss": 0.9095, "step": 16010 }, { "epoch": 0.86, "learning_rate": 4.059897470171987e-05, "loss": 0.7211, "step": 16015 }, { "epoch": 0.86, "learning_rate": 4.0593501561774503e-05, "loss": 0.9216, "step": 16020 }, { "epoch": 0.86, "learning_rate": 4.058802719825766e-05, "loss": 0.7649, "step": 16025 }, { "epoch": 0.86, "learning_rate": 4.058255161159889e-05, "loss": 0.9022, "step": 16030 }, { "epoch": 0.86, "learning_rate": 4.057707480222785e-05, "loss": 0.6374, "step": 16035 }, { "epoch": 0.86, "learning_rate": 4.0571596770574284e-05, "loss": 0.7034, "step": 16040 }, { "epoch": 0.86, "learning_rate": 4.056611751706804e-05, "loss": 0.7342, "step": 16045 }, { "epoch": 0.86, "learning_rate": 4.0560637042139056e-05, "loss": 0.9441, "step": 16050 }, { "epoch": 0.86, "learning_rate": 4.055515534621736e-05, "loss": 0.7188, "step": 16055 }, { "epoch": 0.86, "learning_rate": 4.0549672429733085e-05, "loss": 0.7402, "step": 16060 }, { "epoch": 0.86, "learning_rate": 4.054418829311647e-05, "loss": 0.7769, "step": 16065 }, { "epoch": 0.86, "learning_rate": 4.0538702936797824e-05, "loss": 0.6366, "step": 16070 }, { "epoch": 0.86, "learning_rate": 4.053321636120757e-05, "loss": 0.7457, "step": 16075 }, { "epoch": 0.86, "learning_rate": 4.0527728566776225e-05, "loss": 0.7776, "step": 16080 }, { "epoch": 0.86, "learning_rate": 4.052223955393439e-05, "loss": 0.71, "step": 16085 }, { "epoch": 0.86, "learning_rate": 4.051674932311277e-05, "loss": 0.6044, "step": 16090 }, { "epoch": 0.86, "learning_rate": 4.0511257874742175e-05, "loss": 0.7029, "step": 16095 }, { "epoch": 0.86, "learning_rate": 4.05057652092535e-05, "loss": 0.8178, "step": 16100 }, { "epoch": 0.86, "learning_rate": 4.050027132707773e-05, "loss": 0.7541, "step": 16105 }, { "epoch": 0.86, "learning_rate": 4.049477622864595e-05, "loss": 0.7106, "step": 16110 }, { "epoch": 0.86, "learning_rate": 4.0489279914389354e-05, "loss": 0.7993, "step": 16115 }, { "epoch": 0.86, "learning_rate": 4.048378238473921e-05, "loss": 0.6655, "step": 16120 }, { "epoch": 0.86, "learning_rate": 4.0478283640126897e-05, "loss": 0.6923, "step": 16125 }, { "epoch": 0.86, "learning_rate": 4.0472783680983885e-05, "loss": 0.7375, "step": 16130 }, { "epoch": 0.86, "learning_rate": 4.0467282507741725e-05, "loss": 0.7332, "step": 16135 }, { "epoch": 0.86, "learning_rate": 4.0461780120832094e-05, "loss": 0.8744, "step": 16140 }, { "epoch": 0.86, "learning_rate": 4.045627652068673e-05, "loss": 0.8498, "step": 16145 }, { "epoch": 0.86, "learning_rate": 4.04507717077375e-05, "loss": 0.8059, "step": 16150 }, { "epoch": 0.86, "learning_rate": 4.044526568241633e-05, "loss": 0.8051, "step": 16155 }, { "epoch": 0.86, "learning_rate": 4.043975844515528e-05, "loss": 0.7595, "step": 16160 }, { "epoch": 0.86, "learning_rate": 4.043424999638647e-05, "loss": 0.8861, "step": 16165 }, { "epoch": 0.87, "learning_rate": 4.042874033654214e-05, "loss": 0.655, "step": 16170 }, { "epoch": 0.87, "learning_rate": 4.04232294660546e-05, "loss": 0.7882, "step": 16175 }, { "epoch": 0.87, "learning_rate": 4.041771738535628e-05, "loss": 0.6727, "step": 16180 }, { "epoch": 0.87, "learning_rate": 4.04122040948797e-05, "loss": 0.8422, "step": 16185 }, { "epoch": 0.87, "learning_rate": 4.040668959505747e-05, "loss": 0.7551, "step": 16190 }, { "epoch": 0.87, "learning_rate": 4.040117388632228e-05, "loss": 0.8242, "step": 16195 }, { "epoch": 0.87, "learning_rate": 4.0395656969106946e-05, "loss": 0.7644, "step": 16200 }, { "epoch": 0.87, "learning_rate": 4.039013884384435e-05, "loss": 0.6685, "step": 16205 }, { "epoch": 0.87, "learning_rate": 4.0384619510967494e-05, "loss": 0.7384, "step": 16210 }, { "epoch": 0.87, "learning_rate": 4.037909897090946e-05, "loss": 0.7545, "step": 16215 }, { "epoch": 0.87, "learning_rate": 4.037357722410341e-05, "loss": 0.8711, "step": 16220 }, { "epoch": 0.87, "learning_rate": 4.0368054270982636e-05, "loss": 0.7537, "step": 16225 }, { "epoch": 0.87, "learning_rate": 4.03625301119805e-05, "loss": 0.7159, "step": 16230 }, { "epoch": 0.87, "learning_rate": 4.035700474753047e-05, "loss": 0.8496, "step": 16235 }, { "epoch": 0.87, "learning_rate": 4.035147817806609e-05, "loss": 0.7534, "step": 16240 }, { "epoch": 0.87, "learning_rate": 4.034595040402104e-05, "loss": 0.7851, "step": 16245 }, { "epoch": 0.87, "learning_rate": 4.034042142582903e-05, "loss": 0.7378, "step": 16250 }, { "epoch": 0.87, "learning_rate": 4.033489124392392e-05, "loss": 0.8197, "step": 16255 }, { "epoch": 0.87, "learning_rate": 4.032935985873965e-05, "loss": 0.8229, "step": 16260 }, { "epoch": 0.87, "learning_rate": 4.0323827270710235e-05, "loss": 0.8895, "step": 16265 }, { "epoch": 0.87, "learning_rate": 4.0318293480269806e-05, "loss": 0.7482, "step": 16270 }, { "epoch": 0.87, "learning_rate": 4.03127584878526e-05, "loss": 0.9005, "step": 16275 }, { "epoch": 0.87, "learning_rate": 4.03072222938929e-05, "loss": 0.7047, "step": 16280 }, { "epoch": 0.87, "learning_rate": 4.030168489882512e-05, "loss": 0.8361, "step": 16285 }, { "epoch": 0.87, "learning_rate": 4.029614630308378e-05, "loss": 0.8218, "step": 16290 }, { "epoch": 0.87, "learning_rate": 4.029060650710346e-05, "loss": 0.6176, "step": 16295 }, { "epoch": 0.87, "learning_rate": 4.0285065511318854e-05, "loss": 0.7183, "step": 16300 }, { "epoch": 0.87, "learning_rate": 4.027952331616475e-05, "loss": 0.8603, "step": 16305 }, { "epoch": 0.87, "learning_rate": 4.027397992207601e-05, "loss": 0.7843, "step": 16310 }, { "epoch": 0.87, "learning_rate": 4.026843532948762e-05, "loss": 0.6523, "step": 16315 }, { "epoch": 0.87, "learning_rate": 4.026288953883465e-05, "loss": 0.7879, "step": 16320 }, { "epoch": 0.87, "learning_rate": 4.025734255055225e-05, "loss": 0.9354, "step": 16325 }, { "epoch": 0.87, "learning_rate": 4.0251794365075676e-05, "loss": 0.6322, "step": 16330 }, { "epoch": 0.87, "learning_rate": 4.024624498284029e-05, "loss": 0.9286, "step": 16335 }, { "epoch": 0.87, "learning_rate": 4.024069440428151e-05, "loss": 0.9716, "step": 16340 }, { "epoch": 0.87, "learning_rate": 4.023514262983489e-05, "loss": 0.9005, "step": 16345 }, { "epoch": 0.87, "learning_rate": 4.022958965993605e-05, "loss": 0.7018, "step": 16350 }, { "epoch": 0.87, "learning_rate": 4.022403549502072e-05, "loss": 0.8839, "step": 16355 }, { "epoch": 0.88, "learning_rate": 4.021848013552471e-05, "loss": 0.7201, "step": 16360 }, { "epoch": 0.88, "learning_rate": 4.0212923581883946e-05, "loss": 0.7888, "step": 16365 }, { "epoch": 0.88, "learning_rate": 4.020736583453441e-05, "loss": 0.8112, "step": 16370 }, { "epoch": 0.88, "learning_rate": 4.020180689391222e-05, "loss": 0.7379, "step": 16375 }, { "epoch": 0.88, "learning_rate": 4.0196246760453555e-05, "loss": 0.8324, "step": 16380 }, { "epoch": 0.88, "learning_rate": 4.019068543459471e-05, "loss": 0.7707, "step": 16385 }, { "epoch": 0.88, "learning_rate": 4.0185122916772066e-05, "loss": 0.9081, "step": 16390 }, { "epoch": 0.88, "learning_rate": 4.017955920742208e-05, "loss": 0.789, "step": 16395 }, { "epoch": 0.88, "learning_rate": 4.017399430698133e-05, "loss": 0.7814, "step": 16400 }, { "epoch": 0.88, "learning_rate": 4.016842821588648e-05, "loss": 0.8245, "step": 16405 }, { "epoch": 0.88, "learning_rate": 4.0162860934574275e-05, "loss": 0.7647, "step": 16410 }, { "epoch": 0.88, "learning_rate": 4.015729246348157e-05, "loss": 0.8532, "step": 16415 }, { "epoch": 0.88, "learning_rate": 4.01517228030453e-05, "loss": 0.7159, "step": 16420 }, { "epoch": 0.88, "learning_rate": 4.014615195370248e-05, "loss": 0.7297, "step": 16425 }, { "epoch": 0.88, "learning_rate": 4.0140579915890264e-05, "loss": 0.74, "step": 16430 }, { "epoch": 0.88, "learning_rate": 4.0135006690045864e-05, "loss": 0.763, "step": 16435 }, { "epoch": 0.88, "learning_rate": 4.012943227660659e-05, "loss": 0.7939, "step": 16440 }, { "epoch": 0.88, "learning_rate": 4.012385667600985e-05, "loss": 0.8639, "step": 16445 }, { "epoch": 0.88, "learning_rate": 4.011827988869313e-05, "loss": 0.8524, "step": 16450 }, { "epoch": 0.88, "learning_rate": 4.011270191509404e-05, "loss": 0.6646, "step": 16455 }, { "epoch": 0.88, "learning_rate": 4.0107122755650264e-05, "loss": 0.6719, "step": 16460 }, { "epoch": 0.88, "learning_rate": 4.010154241079957e-05, "loss": 0.7728, "step": 16465 }, { "epoch": 0.88, "learning_rate": 4.009596088097985e-05, "loss": 0.6417, "step": 16470 }, { "epoch": 0.88, "learning_rate": 4.009037816662904e-05, "loss": 0.5898, "step": 16475 }, { "epoch": 0.88, "learning_rate": 4.008479426818521e-05, "loss": 0.9297, "step": 16480 }, { "epoch": 0.88, "learning_rate": 4.007920918608652e-05, "loss": 0.7191, "step": 16485 }, { "epoch": 0.88, "learning_rate": 4.007362292077119e-05, "loss": 0.8802, "step": 16490 }, { "epoch": 0.88, "learning_rate": 4.006803547267759e-05, "loss": 0.7134, "step": 16495 }, { "epoch": 0.88, "learning_rate": 4.006244684224412e-05, "loss": 0.8336, "step": 16500 }, { "epoch": 0.88, "learning_rate": 4.005685702990932e-05, "loss": 0.6985, "step": 16505 }, { "epoch": 0.88, "learning_rate": 4.005126603611179e-05, "loss": 0.7269, "step": 16510 }, { "epoch": 0.88, "learning_rate": 4.004567386129025e-05, "loss": 0.7239, "step": 16515 }, { "epoch": 0.88, "learning_rate": 4.0040080505883484e-05, "loss": 0.8284, "step": 16520 }, { "epoch": 0.88, "learning_rate": 4.0034485970330394e-05, "loss": 0.7905, "step": 16525 }, { "epoch": 0.88, "learning_rate": 4.002889025506997e-05, "loss": 0.8063, "step": 16530 }, { "epoch": 0.88, "learning_rate": 4.002329336054128e-05, "loss": 0.7231, "step": 16535 }, { "epoch": 0.88, "learning_rate": 4.001769528718351e-05, "loss": 0.8226, "step": 16540 }, { "epoch": 0.89, "learning_rate": 4.00120960354359e-05, "loss": 0.7639, "step": 16545 }, { "epoch": 0.89, "learning_rate": 4.0006495605737815e-05, "loss": 0.8261, "step": 16550 }, { "epoch": 0.89, "learning_rate": 4.000089399852871e-05, "loss": 0.864, "step": 16555 }, { "epoch": 0.89, "learning_rate": 3.999529121424812e-05, "loss": 0.8983, "step": 16560 }, { "epoch": 0.89, "learning_rate": 3.9989687253335674e-05, "loss": 0.7784, "step": 16565 }, { "epoch": 0.89, "learning_rate": 3.99840821162311e-05, "loss": 0.8629, "step": 16570 }, { "epoch": 0.89, "learning_rate": 3.9978475803374215e-05, "loss": 0.7185, "step": 16575 }, { "epoch": 0.89, "learning_rate": 3.9972868315204924e-05, "loss": 0.7348, "step": 16580 }, { "epoch": 0.89, "learning_rate": 3.996725965216323e-05, "loss": 0.7805, "step": 16585 }, { "epoch": 0.89, "learning_rate": 3.996164981468923e-05, "loss": 0.8034, "step": 16590 }, { "epoch": 0.89, "learning_rate": 3.9956038803223115e-05, "loss": 0.7638, "step": 16595 }, { "epoch": 0.89, "learning_rate": 3.995042661820515e-05, "loss": 0.7141, "step": 16600 }, { "epoch": 0.89, "learning_rate": 3.9944813260075706e-05, "loss": 0.8057, "step": 16605 }, { "epoch": 0.89, "learning_rate": 3.993919872927525e-05, "loss": 0.6932, "step": 16610 }, { "epoch": 0.89, "learning_rate": 3.9933583026244333e-05, "loss": 0.7585, "step": 16615 }, { "epoch": 0.89, "learning_rate": 3.992796615142362e-05, "loss": 0.7144, "step": 16620 }, { "epoch": 0.89, "learning_rate": 3.992234810525381e-05, "loss": 0.8344, "step": 16625 }, { "epoch": 0.89, "learning_rate": 3.991672888817578e-05, "loss": 0.7852, "step": 16630 }, { "epoch": 0.89, "learning_rate": 3.991110850063041e-05, "loss": 0.6898, "step": 16635 }, { "epoch": 0.89, "learning_rate": 3.9905486943058736e-05, "loss": 0.7883, "step": 16640 }, { "epoch": 0.89, "learning_rate": 3.989986421590185e-05, "loss": 0.8072, "step": 16645 }, { "epoch": 0.89, "learning_rate": 3.9894240319600965e-05, "loss": 0.7579, "step": 16650 }, { "epoch": 0.89, "learning_rate": 3.988861525459736e-05, "loss": 0.6968, "step": 16655 }, { "epoch": 0.89, "learning_rate": 3.988298902133242e-05, "loss": 0.8827, "step": 16660 }, { "epoch": 0.89, "learning_rate": 3.987736162024762e-05, "loss": 0.7285, "step": 16665 }, { "epoch": 0.89, "learning_rate": 3.987173305178451e-05, "loss": 0.8232, "step": 16670 }, { "epoch": 0.89, "learning_rate": 3.986610331638476e-05, "loss": 0.7227, "step": 16675 }, { "epoch": 0.89, "learning_rate": 3.98604724144901e-05, "loss": 0.8194, "step": 16680 }, { "epoch": 0.89, "learning_rate": 3.9854840346542395e-05, "loss": 0.7903, "step": 16685 }, { "epoch": 0.89, "learning_rate": 3.9849207112983553e-05, "loss": 0.7442, "step": 16690 }, { "epoch": 0.89, "learning_rate": 3.98435727142556e-05, "loss": 0.8516, "step": 16695 }, { "epoch": 0.89, "learning_rate": 3.983793715080066e-05, "loss": 0.6535, "step": 16700 }, { "epoch": 0.89, "learning_rate": 3.983230042306093e-05, "loss": 0.661, "step": 16705 }, { "epoch": 0.89, "learning_rate": 3.9826662531478696e-05, "loss": 0.6785, "step": 16710 }, { "epoch": 0.89, "learning_rate": 3.982102347649636e-05, "loss": 0.7567, "step": 16715 }, { "epoch": 0.89, "learning_rate": 3.9815383258556385e-05, "loss": 0.6886, "step": 16720 }, { "epoch": 0.89, "learning_rate": 3.980974187810136e-05, "loss": 0.805, "step": 16725 }, { "epoch": 0.89, "learning_rate": 3.980409933557393e-05, "loss": 0.8739, "step": 16730 }, { "epoch": 0.9, "learning_rate": 3.9798455631416854e-05, "loss": 0.737, "step": 16735 }, { "epoch": 0.9, "learning_rate": 3.979281076607297e-05, "loss": 0.8015, "step": 16740 }, { "epoch": 0.9, "learning_rate": 3.978716473998523e-05, "loss": 0.7946, "step": 16745 }, { "epoch": 0.9, "learning_rate": 3.978151755359663e-05, "loss": 0.7053, "step": 16750 }, { "epoch": 0.9, "learning_rate": 3.977586920735031e-05, "loss": 0.8706, "step": 16755 }, { "epoch": 0.9, "learning_rate": 3.977021970168947e-05, "loss": 0.8629, "step": 16760 }, { "epoch": 0.9, "learning_rate": 3.976456903705741e-05, "loss": 0.7652, "step": 16765 }, { "epoch": 0.9, "learning_rate": 3.9758917213897506e-05, "loss": 0.8781, "step": 16770 }, { "epoch": 0.9, "learning_rate": 3.975326423265325e-05, "loss": 0.6837, "step": 16775 }, { "epoch": 0.9, "learning_rate": 3.974761009376822e-05, "loss": 0.7895, "step": 16780 }, { "epoch": 0.9, "learning_rate": 3.974195479768607e-05, "loss": 0.7482, "step": 16785 }, { "epoch": 0.9, "learning_rate": 3.9736298344850554e-05, "loss": 0.7473, "step": 16790 }, { "epoch": 0.9, "learning_rate": 3.973064073570551e-05, "loss": 0.8753, "step": 16795 }, { "epoch": 0.9, "learning_rate": 3.9724981970694883e-05, "loss": 0.8102, "step": 16800 }, { "epoch": 0.9, "learning_rate": 3.971932205026268e-05, "loss": 0.8776, "step": 16805 }, { "epoch": 0.9, "learning_rate": 3.971366097485304e-05, "loss": 0.8675, "step": 16810 }, { "epoch": 0.9, "learning_rate": 3.970799874491014e-05, "loss": 0.8738, "step": 16815 }, { "epoch": 0.9, "learning_rate": 3.9702335360878316e-05, "loss": 0.8247, "step": 16820 }, { "epoch": 0.9, "learning_rate": 3.969667082320193e-05, "loss": 0.7889, "step": 16825 }, { "epoch": 0.9, "learning_rate": 3.9691005132325456e-05, "loss": 0.7191, "step": 16830 }, { "epoch": 0.9, "learning_rate": 3.9685338288693475e-05, "loss": 0.9174, "step": 16835 }, { "epoch": 0.9, "learning_rate": 3.967967029275063e-05, "loss": 0.732, "step": 16840 }, { "epoch": 0.9, "learning_rate": 3.96740011449417e-05, "loss": 0.6945, "step": 16845 }, { "epoch": 0.9, "learning_rate": 3.966833084571149e-05, "loss": 0.7886, "step": 16850 }, { "epoch": 0.9, "learning_rate": 3.966265939550495e-05, "loss": 0.7362, "step": 16855 }, { "epoch": 0.9, "learning_rate": 3.965698679476709e-05, "loss": 0.8332, "step": 16860 }, { "epoch": 0.9, "learning_rate": 3.965131304394304e-05, "loss": 0.7917, "step": 16865 }, { "epoch": 0.9, "learning_rate": 3.964563814347798e-05, "loss": 0.7757, "step": 16870 }, { "epoch": 0.9, "learning_rate": 3.963996209381721e-05, "loss": 0.7305, "step": 16875 }, { "epoch": 0.9, "learning_rate": 3.96342848954061e-05, "loss": 0.6734, "step": 16880 }, { "epoch": 0.9, "learning_rate": 3.962860654869014e-05, "loss": 0.7232, "step": 16885 }, { "epoch": 0.9, "learning_rate": 3.9622927054114885e-05, "loss": 0.8664, "step": 16890 }, { "epoch": 0.9, "learning_rate": 3.961724641212598e-05, "loss": 0.7922, "step": 16895 }, { "epoch": 0.9, "learning_rate": 3.9611564623169166e-05, "loss": 0.7069, "step": 16900 }, { "epoch": 0.9, "learning_rate": 3.960588168769029e-05, "loss": 0.8122, "step": 16905 }, { "epoch": 0.9, "learning_rate": 3.960019760613525e-05, "loss": 0.6678, "step": 16910 }, { "epoch": 0.9, "learning_rate": 3.959451237895008e-05, "loss": 0.7489, "step": 16915 }, { "epoch": 0.91, "learning_rate": 3.9588826006580864e-05, "loss": 0.7602, "step": 16920 }, { "epoch": 0.91, "learning_rate": 3.9583138489473806e-05, "loss": 0.7805, "step": 16925 }, { "epoch": 0.91, "learning_rate": 3.9577449828075177e-05, "loss": 0.6593, "step": 16930 }, { "epoch": 0.91, "learning_rate": 3.957176002283136e-05, "loss": 0.774, "step": 16935 }, { "epoch": 0.91, "learning_rate": 3.95660690741888e-05, "loss": 0.8005, "step": 16940 }, { "epoch": 0.91, "learning_rate": 3.9560376982594054e-05, "loss": 0.6867, "step": 16945 }, { "epoch": 0.91, "learning_rate": 3.955468374849377e-05, "loss": 0.9174, "step": 16950 }, { "epoch": 0.91, "learning_rate": 3.9548989372334664e-05, "loss": 0.7258, "step": 16955 }, { "epoch": 0.91, "learning_rate": 3.954329385456357e-05, "loss": 0.7968, "step": 16960 }, { "epoch": 0.91, "learning_rate": 3.953759719562738e-05, "loss": 0.7154, "step": 16965 }, { "epoch": 0.91, "learning_rate": 3.953189939597311e-05, "loss": 0.9012, "step": 16970 }, { "epoch": 0.91, "learning_rate": 3.9526200456047825e-05, "loss": 0.8471, "step": 16975 }, { "epoch": 0.91, "learning_rate": 3.952050037629873e-05, "loss": 0.7506, "step": 16980 }, { "epoch": 0.91, "learning_rate": 3.951479915717307e-05, "loss": 0.7195, "step": 16985 }, { "epoch": 0.91, "learning_rate": 3.950909679911822e-05, "loss": 0.8235, "step": 16990 }, { "epoch": 0.91, "learning_rate": 3.95033933025816e-05, "loss": 0.7116, "step": 16995 }, { "epoch": 0.91, "learning_rate": 3.9497688668010765e-05, "loss": 0.7941, "step": 17000 }, { "epoch": 0.91, "learning_rate": 3.9491982895853336e-05, "loss": 0.8068, "step": 17005 }, { "epoch": 0.91, "learning_rate": 3.948627598655702e-05, "loss": 0.7387, "step": 17010 }, { "epoch": 0.91, "learning_rate": 3.948056794056963e-05, "loss": 0.6297, "step": 17015 }, { "epoch": 0.91, "learning_rate": 3.947485875833905e-05, "loss": 0.7021, "step": 17020 }, { "epoch": 0.91, "learning_rate": 3.946914844031326e-05, "loss": 0.6078, "step": 17025 }, { "epoch": 0.91, "learning_rate": 3.946343698694034e-05, "loss": 0.8473, "step": 17030 }, { "epoch": 0.91, "learning_rate": 3.945772439866843e-05, "loss": 0.7613, "step": 17035 }, { "epoch": 0.91, "learning_rate": 3.945201067594579e-05, "loss": 0.8674, "step": 17040 }, { "epoch": 0.91, "learning_rate": 3.944629581922077e-05, "loss": 0.8318, "step": 17045 }, { "epoch": 0.91, "learning_rate": 3.944057982894178e-05, "loss": 0.8046, "step": 17050 }, { "epoch": 0.91, "learning_rate": 3.943486270555734e-05, "loss": 0.6783, "step": 17055 }, { "epoch": 0.91, "learning_rate": 3.942914444951604e-05, "loss": 0.745, "step": 17060 }, { "epoch": 0.91, "learning_rate": 3.9423425061266606e-05, "loss": 0.7864, "step": 17065 }, { "epoch": 0.91, "learning_rate": 3.9417704541257785e-05, "loss": 0.693, "step": 17070 }, { "epoch": 0.91, "learning_rate": 3.941198288993847e-05, "loss": 0.7757, "step": 17075 }, { "epoch": 0.91, "learning_rate": 3.940626010775761e-05, "loss": 0.7796, "step": 17080 }, { "epoch": 0.91, "learning_rate": 3.940053619516426e-05, "loss": 0.5972, "step": 17085 }, { "epoch": 0.91, "learning_rate": 3.939481115260755e-05, "loss": 0.7691, "step": 17090 }, { "epoch": 0.91, "learning_rate": 3.9389084980536705e-05, "loss": 0.8229, "step": 17095 }, { "epoch": 0.91, "learning_rate": 3.938335767940105e-05, "loss": 0.6784, "step": 17100 }, { "epoch": 0.92, "learning_rate": 3.9377629249649985e-05, "loss": 0.6522, "step": 17105 }, { "epoch": 0.92, "learning_rate": 3.9371899691732986e-05, "loss": 0.8549, "step": 17110 }, { "epoch": 0.92, "learning_rate": 3.936616900609964e-05, "loss": 0.7314, "step": 17115 }, { "epoch": 0.92, "learning_rate": 3.936043719319963e-05, "loss": 0.6969, "step": 17120 }, { "epoch": 0.92, "learning_rate": 3.9354704253482696e-05, "loss": 0.6828, "step": 17125 }, { "epoch": 0.92, "learning_rate": 3.9348970187398684e-05, "loss": 0.9116, "step": 17130 }, { "epoch": 0.92, "learning_rate": 3.934323499539755e-05, "loss": 0.791, "step": 17135 }, { "epoch": 0.92, "learning_rate": 3.9337498677929286e-05, "loss": 0.742, "step": 17140 }, { "epoch": 0.92, "learning_rate": 3.933176123544401e-05, "loss": 0.8269, "step": 17145 }, { "epoch": 0.92, "learning_rate": 3.932602266839193e-05, "loss": 0.671, "step": 17150 }, { "epoch": 0.92, "learning_rate": 3.9320282977223335e-05, "loss": 0.8394, "step": 17155 }, { "epoch": 0.92, "learning_rate": 3.931454216238858e-05, "loss": 0.7843, "step": 17160 }, { "epoch": 0.92, "learning_rate": 3.930880022433815e-05, "loss": 0.7456, "step": 17165 }, { "epoch": 0.92, "learning_rate": 3.9303057163522586e-05, "loss": 0.7608, "step": 17170 }, { "epoch": 0.92, "learning_rate": 3.929731298039252e-05, "loss": 0.7318, "step": 17175 }, { "epoch": 0.92, "learning_rate": 3.92915676753987e-05, "loss": 0.7529, "step": 17180 }, { "epoch": 0.92, "learning_rate": 3.9285821248991915e-05, "loss": 0.6915, "step": 17185 }, { "epoch": 0.92, "learning_rate": 3.92800737016231e-05, "loss": 0.6232, "step": 17190 }, { "epoch": 0.92, "learning_rate": 3.927432503374322e-05, "loss": 0.7671, "step": 17195 }, { "epoch": 0.92, "learning_rate": 3.926857524580336e-05, "loss": 0.7864, "step": 17200 }, { "epoch": 0.92, "learning_rate": 3.92628243382547e-05, "loss": 0.7925, "step": 17205 }, { "epoch": 0.92, "learning_rate": 3.925707231154848e-05, "loss": 0.8217, "step": 17210 }, { "epoch": 0.92, "learning_rate": 3.9251319166136046e-05, "loss": 0.705, "step": 17215 }, { "epoch": 0.92, "learning_rate": 3.924556490246884e-05, "loss": 0.776, "step": 17220 }, { "epoch": 0.92, "learning_rate": 3.923980952099836e-05, "loss": 0.798, "step": 17225 }, { "epoch": 0.92, "learning_rate": 3.923405302217623e-05, "loss": 0.6944, "step": 17230 }, { "epoch": 0.92, "learning_rate": 3.922829540645414e-05, "loss": 0.6899, "step": 17235 }, { "epoch": 0.92, "learning_rate": 3.922253667428387e-05, "loss": 0.7824, "step": 17240 }, { "epoch": 0.92, "learning_rate": 3.9216776826117286e-05, "loss": 0.875, "step": 17245 }, { "epoch": 0.92, "learning_rate": 3.921101586240634e-05, "loss": 0.9044, "step": 17250 }, { "epoch": 0.92, "learning_rate": 3.920525378360309e-05, "loss": 0.7314, "step": 17255 }, { "epoch": 0.92, "learning_rate": 3.919949059015966e-05, "loss": 0.7633, "step": 17260 }, { "epoch": 0.92, "learning_rate": 3.919372628252827e-05, "loss": 0.8406, "step": 17265 }, { "epoch": 0.92, "learning_rate": 3.918796086116122e-05, "loss": 0.8061, "step": 17270 }, { "epoch": 0.92, "learning_rate": 3.9182194326510916e-05, "loss": 0.7262, "step": 17275 }, { "epoch": 0.92, "learning_rate": 3.9176426679029835e-05, "loss": 0.7744, "step": 17280 }, { "epoch": 0.92, "learning_rate": 3.917065791917053e-05, "loss": 0.8709, "step": 17285 }, { "epoch": 0.92, "learning_rate": 3.916488804738568e-05, "loss": 0.9183, "step": 17290 }, { "epoch": 0.93, "learning_rate": 3.9159117064128027e-05, "loss": 0.8939, "step": 17295 }, { "epoch": 0.93, "learning_rate": 3.9153344969850384e-05, "loss": 0.6784, "step": 17300 }, { "epoch": 0.93, "learning_rate": 3.914757176500567e-05, "loss": 0.9736, "step": 17305 }, { "epoch": 0.93, "learning_rate": 3.914179745004691e-05, "loss": 0.8132, "step": 17310 }, { "epoch": 0.93, "learning_rate": 3.913602202542718e-05, "loss": 0.6945, "step": 17315 }, { "epoch": 0.93, "learning_rate": 3.913024549159966e-05, "loss": 0.8214, "step": 17320 }, { "epoch": 0.93, "learning_rate": 3.912446784901762e-05, "loss": 0.6764, "step": 17325 }, { "epoch": 0.93, "learning_rate": 3.911868909813441e-05, "loss": 0.6392, "step": 17330 }, { "epoch": 0.93, "learning_rate": 3.9112909239403475e-05, "loss": 0.8495, "step": 17335 }, { "epoch": 0.93, "learning_rate": 3.910712827327833e-05, "loss": 0.5846, "step": 17340 }, { "epoch": 0.93, "learning_rate": 3.91013462002126e-05, "loss": 0.6663, "step": 17345 }, { "epoch": 0.93, "learning_rate": 3.909556302065998e-05, "loss": 0.8245, "step": 17350 }, { "epoch": 0.93, "learning_rate": 3.908977873507425e-05, "loss": 0.6627, "step": 17355 }, { "epoch": 0.93, "learning_rate": 3.908399334390931e-05, "loss": 0.7817, "step": 17360 }, { "epoch": 0.93, "learning_rate": 3.907820684761909e-05, "loss": 0.7928, "step": 17365 }, { "epoch": 0.93, "learning_rate": 3.9072419246657655e-05, "loss": 0.932, "step": 17370 }, { "epoch": 0.93, "learning_rate": 3.906663054147913e-05, "loss": 0.7943, "step": 17375 }, { "epoch": 0.93, "learning_rate": 3.906084073253775e-05, "loss": 0.8446, "step": 17380 }, { "epoch": 0.93, "learning_rate": 3.905504982028781e-05, "loss": 0.7757, "step": 17385 }, { "epoch": 0.93, "learning_rate": 3.9049257805183714e-05, "loss": 0.7323, "step": 17390 }, { "epoch": 0.93, "learning_rate": 3.904346468767993e-05, "loss": 0.6872, "step": 17395 }, { "epoch": 0.93, "learning_rate": 3.9037670468231037e-05, "loss": 0.5631, "step": 17400 }, { "epoch": 0.93, "learning_rate": 3.9031875147291684e-05, "loss": 0.7096, "step": 17405 }, { "epoch": 0.93, "learning_rate": 3.9026078725316605e-05, "loss": 0.8894, "step": 17410 }, { "epoch": 0.93, "learning_rate": 3.902028120276063e-05, "loss": 0.72, "step": 17415 }, { "epoch": 0.93, "learning_rate": 3.9014482580078684e-05, "loss": 0.8377, "step": 17420 }, { "epoch": 0.93, "learning_rate": 3.900868285772575e-05, "loss": 0.7026, "step": 17425 }, { "epoch": 0.93, "learning_rate": 3.900288203615692e-05, "loss": 0.7654, "step": 17430 }, { "epoch": 0.93, "learning_rate": 3.899708011582737e-05, "loss": 0.8032, "step": 17435 }, { "epoch": 0.93, "learning_rate": 3.899127709719234e-05, "loss": 0.7333, "step": 17440 }, { "epoch": 0.93, "learning_rate": 3.898547298070719e-05, "loss": 0.8479, "step": 17445 }, { "epoch": 0.93, "learning_rate": 3.8979667766827355e-05, "loss": 0.6394, "step": 17450 }, { "epoch": 0.93, "learning_rate": 3.897386145600834e-05, "loss": 0.8133, "step": 17455 }, { "epoch": 0.93, "learning_rate": 3.896805404870575e-05, "loss": 0.8142, "step": 17460 }, { "epoch": 0.93, "learning_rate": 3.896224554537527e-05, "loss": 0.6381, "step": 17465 }, { "epoch": 0.93, "learning_rate": 3.895643594647268e-05, "loss": 0.5959, "step": 17470 }, { "epoch": 0.93, "learning_rate": 3.895062525245384e-05, "loss": 0.742, "step": 17475 }, { "epoch": 0.94, "learning_rate": 3.8944813463774705e-05, "loss": 0.6821, "step": 17480 }, { "epoch": 0.94, "learning_rate": 3.893900058089128e-05, "loss": 0.808, "step": 17485 }, { "epoch": 0.94, "learning_rate": 3.8933186604259715e-05, "loss": 0.6793, "step": 17490 }, { "epoch": 0.94, "learning_rate": 3.8927371534336196e-05, "loss": 0.7619, "step": 17495 }, { "epoch": 0.94, "learning_rate": 3.8921555371577e-05, "loss": 0.6483, "step": 17500 }, { "epoch": 0.94, "learning_rate": 3.891573811643854e-05, "loss": 0.899, "step": 17505 }, { "epoch": 0.94, "learning_rate": 3.890991976937725e-05, "loss": 0.7156, "step": 17510 }, { "epoch": 0.94, "learning_rate": 3.890410033084968e-05, "loss": 0.7189, "step": 17515 }, { "epoch": 0.94, "learning_rate": 3.889827980131246e-05, "loss": 0.9347, "step": 17520 }, { "epoch": 0.94, "learning_rate": 3.889245818122232e-05, "loss": 0.6657, "step": 17525 }, { "epoch": 0.94, "learning_rate": 3.8886635471036056e-05, "loss": 0.7726, "step": 17530 }, { "epoch": 0.94, "learning_rate": 3.8880811671210556e-05, "loss": 0.7855, "step": 17535 }, { "epoch": 0.94, "learning_rate": 3.887498678220279e-05, "loss": 0.7795, "step": 17540 }, { "epoch": 0.94, "learning_rate": 3.8869160804469834e-05, "loss": 0.7495, "step": 17545 }, { "epoch": 0.94, "learning_rate": 3.8863333738468824e-05, "loss": 0.8478, "step": 17550 }, { "epoch": 0.94, "learning_rate": 3.885750558465698e-05, "loss": 0.8729, "step": 17555 }, { "epoch": 0.94, "learning_rate": 3.885167634349165e-05, "loss": 0.9165, "step": 17560 }, { "epoch": 0.94, "learning_rate": 3.8845846015430195e-05, "loss": 0.7312, "step": 17565 }, { "epoch": 0.94, "learning_rate": 3.884001460093013e-05, "loss": 0.7507, "step": 17570 }, { "epoch": 0.94, "learning_rate": 3.883418210044901e-05, "loss": 0.7741, "step": 17575 }, { "epoch": 0.94, "learning_rate": 3.882834851444451e-05, "loss": 0.7969, "step": 17580 }, { "epoch": 0.94, "learning_rate": 3.8822513843374367e-05, "loss": 0.6521, "step": 17585 }, { "epoch": 0.94, "learning_rate": 3.881667808769641e-05, "loss": 0.7637, "step": 17590 }, { "epoch": 0.94, "learning_rate": 3.8810841247868535e-05, "loss": 0.7294, "step": 17595 }, { "epoch": 0.94, "learning_rate": 3.8805003324348757e-05, "loss": 0.7753, "step": 17600 }, { "epoch": 0.94, "learning_rate": 3.879916431759516e-05, "loss": 0.8253, "step": 17605 }, { "epoch": 0.94, "learning_rate": 3.87933242280659e-05, "loss": 0.7932, "step": 17610 }, { "epoch": 0.94, "learning_rate": 3.878748305621923e-05, "loss": 0.9105, "step": 17615 }, { "epoch": 0.94, "learning_rate": 3.878164080251351e-05, "loss": 0.7556, "step": 17620 }, { "epoch": 0.94, "learning_rate": 3.877579746740714e-05, "loss": 0.7761, "step": 17625 }, { "epoch": 0.94, "learning_rate": 3.876995305135863e-05, "loss": 0.8138, "step": 17630 }, { "epoch": 0.94, "learning_rate": 3.876410755482658e-05, "loss": 0.7631, "step": 17635 }, { "epoch": 0.94, "learning_rate": 3.875826097826966e-05, "loss": 0.8511, "step": 17640 }, { "epoch": 0.94, "learning_rate": 3.875241332214664e-05, "loss": 0.7067, "step": 17645 }, { "epoch": 0.94, "learning_rate": 3.874656458691637e-05, "loss": 0.8036, "step": 17650 }, { "epoch": 0.94, "learning_rate": 3.874071477303775e-05, "loss": 0.7485, "step": 17655 }, { "epoch": 0.94, "learning_rate": 3.8734863880969844e-05, "loss": 0.941, "step": 17660 }, { "epoch": 0.95, "learning_rate": 3.872901191117172e-05, "loss": 0.7004, "step": 17665 }, { "epoch": 0.95, "learning_rate": 3.8723158864102566e-05, "loss": 0.7929, "step": 17670 }, { "epoch": 0.95, "learning_rate": 3.871730474022166e-05, "loss": 0.8741, "step": 17675 }, { "epoch": 0.95, "learning_rate": 3.871144953998835e-05, "loss": 0.6594, "step": 17680 }, { "epoch": 0.95, "learning_rate": 3.8705593263862085e-05, "loss": 0.7829, "step": 17685 }, { "epoch": 0.95, "learning_rate": 3.869973591230237e-05, "loss": 0.7887, "step": 17690 }, { "epoch": 0.95, "learning_rate": 3.869387748576884e-05, "loss": 0.699, "step": 17695 }, { "epoch": 0.95, "learning_rate": 3.868801798472115e-05, "loss": 0.6287, "step": 17700 }, { "epoch": 0.95, "learning_rate": 3.8682157409619105e-05, "loss": 0.8502, "step": 17705 }, { "epoch": 0.95, "learning_rate": 3.8676295760922555e-05, "loss": 0.7976, "step": 17710 }, { "epoch": 0.95, "learning_rate": 3.867043303909145e-05, "loss": 0.8013, "step": 17715 }, { "epoch": 0.95, "learning_rate": 3.86645692445858e-05, "loss": 0.685, "step": 17720 }, { "epoch": 0.95, "learning_rate": 3.865870437786574e-05, "loss": 0.8897, "step": 17725 }, { "epoch": 0.95, "learning_rate": 3.8652838439391464e-05, "loss": 0.7566, "step": 17730 }, { "epoch": 0.95, "learning_rate": 3.864697142962325e-05, "loss": 0.6678, "step": 17735 }, { "epoch": 0.95, "learning_rate": 3.864110334902145e-05, "loss": 0.7299, "step": 17740 }, { "epoch": 0.95, "learning_rate": 3.8635234198046534e-05, "loss": 0.7878, "step": 17745 }, { "epoch": 0.95, "learning_rate": 3.862936397715902e-05, "loss": 0.7374, "step": 17750 }, { "epoch": 0.95, "learning_rate": 3.862349268681954e-05, "loss": 0.7431, "step": 17755 }, { "epoch": 0.95, "learning_rate": 3.861762032748878e-05, "loss": 0.7168, "step": 17760 }, { "epoch": 0.95, "learning_rate": 3.8611746899627534e-05, "loss": 0.7872, "step": 17765 }, { "epoch": 0.95, "learning_rate": 3.860587240369666e-05, "loss": 0.7851, "step": 17770 }, { "epoch": 0.95, "learning_rate": 3.8599996840157126e-05, "loss": 0.9775, "step": 17775 }, { "epoch": 0.95, "learning_rate": 3.859412020946995e-05, "loss": 0.7903, "step": 17780 }, { "epoch": 0.95, "learning_rate": 3.858824251209628e-05, "loss": 0.7083, "step": 17785 }, { "epoch": 0.95, "learning_rate": 3.85823637484973e-05, "loss": 0.8283, "step": 17790 }, { "epoch": 0.95, "learning_rate": 3.8576483919134295e-05, "loss": 0.7515, "step": 17795 }, { "epoch": 0.95, "learning_rate": 3.857060302446864e-05, "loss": 0.6571, "step": 17800 }, { "epoch": 0.95, "learning_rate": 3.8564721064961794e-05, "loss": 0.7269, "step": 17805 }, { "epoch": 0.95, "learning_rate": 3.8558838041075296e-05, "loss": 0.6345, "step": 17810 }, { "epoch": 0.95, "learning_rate": 3.855295395327077e-05, "loss": 0.7981, "step": 17815 }, { "epoch": 0.95, "learning_rate": 3.85470688020099e-05, "loss": 0.8889, "step": 17820 }, { "epoch": 0.95, "learning_rate": 3.8541182587754495e-05, "loss": 0.8363, "step": 17825 }, { "epoch": 0.95, "learning_rate": 3.853529531096643e-05, "loss": 0.7905, "step": 17830 }, { "epoch": 0.95, "learning_rate": 3.852940697210765e-05, "loss": 0.7161, "step": 17835 }, { "epoch": 0.95, "learning_rate": 3.85235175716402e-05, "loss": 0.6421, "step": 17840 }, { "epoch": 0.95, "learning_rate": 3.85176271100262e-05, "loss": 0.6763, "step": 17845 }, { "epoch": 0.95, "learning_rate": 3.8511735587727846e-05, "loss": 0.7565, "step": 17850 }, { "epoch": 0.96, "learning_rate": 3.850584300520744e-05, "loss": 0.8027, "step": 17855 }, { "epoch": 0.96, "learning_rate": 3.8499949362927354e-05, "loss": 0.8461, "step": 17860 }, { "epoch": 0.96, "learning_rate": 3.849405466135003e-05, "loss": 0.8408, "step": 17865 }, { "epoch": 0.96, "learning_rate": 3.8488158900938016e-05, "loss": 0.7571, "step": 17870 }, { "epoch": 0.96, "learning_rate": 3.8482262082153934e-05, "loss": 0.8898, "step": 17875 }, { "epoch": 0.96, "learning_rate": 3.847636420546049e-05, "loss": 0.8263, "step": 17880 }, { "epoch": 0.96, "learning_rate": 3.8470465271320457e-05, "loss": 0.7813, "step": 17885 }, { "epoch": 0.96, "learning_rate": 3.846456528019672e-05, "loss": 0.7865, "step": 17890 }, { "epoch": 0.96, "learning_rate": 3.845866423255223e-05, "loss": 0.7483, "step": 17895 }, { "epoch": 0.96, "learning_rate": 3.8452762128850017e-05, "loss": 0.6165, "step": 17900 }, { "epoch": 0.96, "learning_rate": 3.84468589695532e-05, "loss": 0.5934, "step": 17905 }, { "epoch": 0.96, "learning_rate": 3.8440954755124994e-05, "loss": 0.8084, "step": 17910 }, { "epoch": 0.96, "learning_rate": 3.8435049486028665e-05, "loss": 0.7367, "step": 17915 }, { "epoch": 0.96, "learning_rate": 3.842914316272759e-05, "loss": 0.7221, "step": 17920 }, { "epoch": 0.96, "learning_rate": 3.842323578568522e-05, "loss": 0.5363, "step": 17925 }, { "epoch": 0.96, "learning_rate": 3.8417327355365086e-05, "loss": 0.7407, "step": 17930 }, { "epoch": 0.96, "learning_rate": 3.8411417872230805e-05, "loss": 0.7741, "step": 17935 }, { "epoch": 0.96, "learning_rate": 3.840550733674607e-05, "loss": 0.8834, "step": 17940 }, { "epoch": 0.96, "learning_rate": 3.839959574937466e-05, "loss": 0.673, "step": 17945 }, { "epoch": 0.96, "learning_rate": 3.839368311058045e-05, "loss": 0.811, "step": 17950 }, { "epoch": 0.96, "learning_rate": 3.8387769420827375e-05, "loss": 0.7714, "step": 17955 }, { "epoch": 0.96, "learning_rate": 3.838185468057947e-05, "loss": 0.7051, "step": 17960 }, { "epoch": 0.96, "learning_rate": 3.837593889030083e-05, "loss": 0.7434, "step": 17965 }, { "epoch": 0.96, "learning_rate": 3.837002205045568e-05, "loss": 0.7249, "step": 17970 }, { "epoch": 0.96, "learning_rate": 3.8364104161508256e-05, "loss": 0.7987, "step": 17975 }, { "epoch": 0.96, "learning_rate": 3.835818522392294e-05, "loss": 0.7849, "step": 17980 }, { "epoch": 0.96, "learning_rate": 3.835226523816417e-05, "loss": 0.7924, "step": 17985 }, { "epoch": 0.96, "learning_rate": 3.8346344204696465e-05, "loss": 0.7745, "step": 17990 }, { "epoch": 0.96, "learning_rate": 3.8340422123984424e-05, "loss": 0.706, "step": 17995 }, { "epoch": 0.96, "learning_rate": 3.833449899649274e-05, "loss": 0.7746, "step": 18000 }, { "epoch": 0.96, "learning_rate": 3.832857482268618e-05, "loss": 0.7411, "step": 18005 }, { "epoch": 0.96, "learning_rate": 3.8322649603029595e-05, "loss": 0.602, "step": 18010 }, { "epoch": 0.96, "learning_rate": 3.8316723337987906e-05, "loss": 0.8662, "step": 18015 }, { "epoch": 0.96, "learning_rate": 3.831079602802616e-05, "loss": 0.7628, "step": 18020 }, { "epoch": 0.96, "learning_rate": 3.830486767360941e-05, "loss": 0.6366, "step": 18025 }, { "epoch": 0.96, "learning_rate": 3.829893827520287e-05, "loss": 0.7286, "step": 18030 }, { "epoch": 0.96, "learning_rate": 3.8293007833271786e-05, "loss": 0.8289, "step": 18035 }, { "epoch": 0.97, "learning_rate": 3.8287076348281505e-05, "loss": 0.7653, "step": 18040 }, { "epoch": 0.97, "learning_rate": 3.8281143820697446e-05, "loss": 0.8328, "step": 18045 }, { "epoch": 0.97, "learning_rate": 3.827521025098512e-05, "loss": 0.7595, "step": 18050 }, { "epoch": 0.97, "learning_rate": 3.826927563961012e-05, "loss": 0.6525, "step": 18055 }, { "epoch": 0.97, "learning_rate": 3.826333998703809e-05, "loss": 0.7187, "step": 18060 }, { "epoch": 0.97, "learning_rate": 3.8257403293734816e-05, "loss": 0.7774, "step": 18065 }, { "epoch": 0.97, "learning_rate": 3.825146556016611e-05, "loss": 0.7758, "step": 18070 }, { "epoch": 0.97, "learning_rate": 3.82455267867979e-05, "loss": 0.7693, "step": 18075 }, { "epoch": 0.97, "learning_rate": 3.8239586974096164e-05, "loss": 0.9131, "step": 18080 }, { "epoch": 0.97, "learning_rate": 3.823364612252699e-05, "loss": 0.7526, "step": 18085 }, { "epoch": 0.97, "learning_rate": 3.8227704232556546e-05, "loss": 0.7251, "step": 18090 }, { "epoch": 0.97, "learning_rate": 3.822176130465105e-05, "loss": 0.8181, "step": 18095 }, { "epoch": 0.97, "learning_rate": 3.821581733927685e-05, "loss": 0.7405, "step": 18100 }, { "epoch": 0.97, "learning_rate": 3.8209872336900346e-05, "loss": 0.7548, "step": 18105 }, { "epoch": 0.97, "learning_rate": 3.8203926297988e-05, "loss": 0.7436, "step": 18110 }, { "epoch": 0.97, "learning_rate": 3.81979792230064e-05, "loss": 0.8826, "step": 18115 }, { "epoch": 0.97, "learning_rate": 3.8192031112422185e-05, "loss": 0.7785, "step": 18120 }, { "epoch": 0.97, "learning_rate": 3.8186081966702085e-05, "loss": 0.8562, "step": 18125 }, { "epoch": 0.97, "learning_rate": 3.818013178631291e-05, "loss": 0.7341, "step": 18130 }, { "epoch": 0.97, "learning_rate": 3.817418057172156e-05, "loss": 0.8296, "step": 18135 }, { "epoch": 0.97, "learning_rate": 3.816822832339499e-05, "loss": 0.8363, "step": 18140 }, { "epoch": 0.97, "learning_rate": 3.816227504180028e-05, "loss": 0.7421, "step": 18145 }, { "epoch": 0.97, "learning_rate": 3.815632072740454e-05, "loss": 0.99, "step": 18150 }, { "epoch": 0.97, "learning_rate": 3.815036538067499e-05, "loss": 0.7078, "step": 18155 }, { "epoch": 0.97, "learning_rate": 3.814440900207894e-05, "loss": 0.7589, "step": 18160 }, { "epoch": 0.97, "learning_rate": 3.8138451592083755e-05, "loss": 0.8, "step": 18165 }, { "epoch": 0.97, "learning_rate": 3.8132493151156894e-05, "loss": 0.7336, "step": 18170 }, { "epoch": 0.97, "learning_rate": 3.8126533679765894e-05, "loss": 0.6928, "step": 18175 }, { "epoch": 0.97, "learning_rate": 3.81205731783784e-05, "loss": 0.8581, "step": 18180 }, { "epoch": 0.97, "learning_rate": 3.8114611647462084e-05, "loss": 0.6296, "step": 18185 }, { "epoch": 0.97, "learning_rate": 3.810864908748474e-05, "loss": 0.611, "step": 18190 }, { "epoch": 0.97, "learning_rate": 3.8102685498914226e-05, "loss": 0.8231, "step": 18195 }, { "epoch": 0.97, "learning_rate": 3.80967208822185e-05, "loss": 0.8017, "step": 18200 }, { "epoch": 0.97, "learning_rate": 3.8090755237865556e-05, "loss": 0.7067, "step": 18205 }, { "epoch": 0.97, "learning_rate": 3.8084788566323524e-05, "loss": 0.7478, "step": 18210 }, { "epoch": 0.97, "learning_rate": 3.807882086806059e-05, "loss": 0.784, "step": 18215 }, { "epoch": 0.97, "learning_rate": 3.8072852143545015e-05, "loss": 0.8031, "step": 18220 }, { "epoch": 0.97, "learning_rate": 3.8066882393245126e-05, "loss": 0.8042, "step": 18225 }, { "epoch": 0.98, "learning_rate": 3.806091161762938e-05, "loss": 0.7842, "step": 18230 }, { "epoch": 0.98, "learning_rate": 3.8054939817166275e-05, "loss": 0.6238, "step": 18235 }, { "epoch": 0.98, "learning_rate": 3.804896699232439e-05, "loss": 0.7415, "step": 18240 }, { "epoch": 0.98, "learning_rate": 3.804299314357239e-05, "loss": 0.7814, "step": 18245 }, { "epoch": 0.98, "learning_rate": 3.803701827137905e-05, "loss": 0.6005, "step": 18250 }, { "epoch": 0.98, "learning_rate": 3.803104237621318e-05, "loss": 0.6153, "step": 18255 }, { "epoch": 0.98, "learning_rate": 3.802506545854367e-05, "loss": 0.6509, "step": 18260 }, { "epoch": 0.98, "learning_rate": 3.801908751883955e-05, "loss": 0.7546, "step": 18265 }, { "epoch": 0.98, "learning_rate": 3.801310855756986e-05, "loss": 0.6083, "step": 18270 }, { "epoch": 0.98, "learning_rate": 3.8007128575203765e-05, "loss": 0.7606, "step": 18275 }, { "epoch": 0.98, "learning_rate": 3.8001147572210485e-05, "loss": 0.7806, "step": 18280 }, { "epoch": 0.98, "learning_rate": 3.799516554905932e-05, "loss": 0.8587, "step": 18285 }, { "epoch": 0.98, "learning_rate": 3.798918250621969e-05, "loss": 0.6479, "step": 18290 }, { "epoch": 0.98, "learning_rate": 3.7983198444161045e-05, "loss": 0.6683, "step": 18295 }, { "epoch": 0.98, "learning_rate": 3.797721336335294e-05, "loss": 0.7001, "step": 18300 }, { "epoch": 0.98, "learning_rate": 3.7971227264264997e-05, "loss": 0.8493, "step": 18305 }, { "epoch": 0.98, "learning_rate": 3.796524014736694e-05, "loss": 0.7354, "step": 18310 }, { "epoch": 0.98, "learning_rate": 3.7959252013128546e-05, "loss": 0.6859, "step": 18315 }, { "epoch": 0.98, "learning_rate": 3.795326286201968e-05, "loss": 0.7001, "step": 18320 }, { "epoch": 0.98, "learning_rate": 3.794727269451032e-05, "loss": 0.7015, "step": 18325 }, { "epoch": 0.98, "learning_rate": 3.794128151107046e-05, "loss": 0.7658, "step": 18330 }, { "epoch": 0.98, "learning_rate": 3.793528931217023e-05, "loss": 0.8859, "step": 18335 }, { "epoch": 0.98, "learning_rate": 3.792929609827981e-05, "loss": 0.7092, "step": 18340 }, { "epoch": 0.98, "learning_rate": 3.792330186986947e-05, "loss": 0.7721, "step": 18345 }, { "epoch": 0.98, "learning_rate": 3.7917306627409556e-05, "loss": 0.7075, "step": 18350 }, { "epoch": 0.98, "learning_rate": 3.79113103713705e-05, "loss": 0.8593, "step": 18355 }, { "epoch": 0.98, "learning_rate": 3.7905313102222816e-05, "loss": 0.7468, "step": 18360 }, { "epoch": 0.98, "learning_rate": 3.789931482043707e-05, "loss": 0.6434, "step": 18365 }, { "epoch": 0.98, "learning_rate": 3.789331552648393e-05, "loss": 0.7863, "step": 18370 }, { "epoch": 0.98, "learning_rate": 3.788731522083416e-05, "loss": 0.7467, "step": 18375 }, { "epoch": 0.98, "learning_rate": 3.788131390395858e-05, "loss": 0.6507, "step": 18380 }, { "epoch": 0.98, "learning_rate": 3.787531157632808e-05, "loss": 1.0363, "step": 18385 }, { "epoch": 0.98, "learning_rate": 3.786930823841365e-05, "loss": 0.9571, "step": 18390 }, { "epoch": 0.98, "learning_rate": 3.786330389068635e-05, "loss": 0.7973, "step": 18395 }, { "epoch": 0.98, "learning_rate": 3.785729853361733e-05, "loss": 0.9392, "step": 18400 }, { "epoch": 0.98, "learning_rate": 3.785249352155273e-05, "loss": 0.7905, "step": 18405 }, { "epoch": 0.98, "learning_rate": 3.7846486348856136e-05, "loss": 0.665, "step": 18410 }, { "epoch": 0.99, "learning_rate": 3.784047816813743e-05, "loss": 0.7184, "step": 18415 }, { "epoch": 0.99, "learning_rate": 3.783446897986806e-05, "loss": 0.6136, "step": 18420 }, { "epoch": 0.99, "learning_rate": 3.782845878451955e-05, "loss": 0.7457, "step": 18425 }, { "epoch": 0.99, "learning_rate": 3.7822447582563505e-05, "loss": 0.7722, "step": 18430 }, { "epoch": 0.99, "learning_rate": 3.781643537447158e-05, "loss": 0.5666, "step": 18435 }, { "epoch": 0.99, "learning_rate": 3.781042216071555e-05, "loss": 0.7349, "step": 18440 }, { "epoch": 0.99, "learning_rate": 3.780440794176727e-05, "loss": 0.6803, "step": 18445 }, { "epoch": 0.99, "learning_rate": 3.779839271809863e-05, "loss": 0.8977, "step": 18450 }, { "epoch": 0.99, "learning_rate": 3.779237649018163e-05, "loss": 0.712, "step": 18455 }, { "epoch": 0.99, "learning_rate": 3.7786359258488356e-05, "loss": 0.8406, "step": 18460 }, { "epoch": 0.99, "learning_rate": 3.778034102349095e-05, "loss": 0.6713, "step": 18465 }, { "epoch": 0.99, "learning_rate": 3.777432178566164e-05, "loss": 0.6923, "step": 18470 }, { "epoch": 0.99, "learning_rate": 3.776830154547275e-05, "loss": 0.8148, "step": 18475 }, { "epoch": 0.99, "learning_rate": 3.776228030339666e-05, "loss": 0.663, "step": 18480 }, { "epoch": 0.99, "learning_rate": 3.775625805990583e-05, "loss": 0.7632, "step": 18485 }, { "epoch": 0.99, "learning_rate": 3.775023481547282e-05, "loss": 0.6002, "step": 18490 }, { "epoch": 0.99, "learning_rate": 3.774421057057025e-05, "loss": 0.8781, "step": 18495 }, { "epoch": 0.99, "learning_rate": 3.7738185325670815e-05, "loss": 0.585, "step": 18500 }, { "epoch": 0.99, "learning_rate": 3.773215908124731e-05, "loss": 0.793, "step": 18505 }, { "epoch": 0.99, "learning_rate": 3.772613183777258e-05, "loss": 0.7079, "step": 18510 }, { "epoch": 0.99, "learning_rate": 3.7720103595719586e-05, "loss": 0.8202, "step": 18515 }, { "epoch": 0.99, "learning_rate": 3.771407435556131e-05, "loss": 0.6826, "step": 18520 }, { "epoch": 0.99, "learning_rate": 3.770804411777088e-05, "loss": 0.7195, "step": 18525 }, { "epoch": 0.99, "learning_rate": 3.770201288282145e-05, "loss": 0.7974, "step": 18530 }, { "epoch": 0.99, "learning_rate": 3.769598065118628e-05, "loss": 0.6449, "step": 18535 }, { "epoch": 0.99, "learning_rate": 3.7689947423338686e-05, "loss": 0.7252, "step": 18540 }, { "epoch": 0.99, "learning_rate": 3.768391319975211e-05, "loss": 0.8655, "step": 18545 }, { "epoch": 0.99, "learning_rate": 3.7677877980900004e-05, "loss": 0.7962, "step": 18550 }, { "epoch": 0.99, "learning_rate": 3.7671841767255935e-05, "loss": 0.9109, "step": 18555 }, { "epoch": 0.99, "learning_rate": 3.766580455929355e-05, "loss": 0.802, "step": 18560 }, { "epoch": 0.99, "learning_rate": 3.765976635748658e-05, "loss": 0.7244, "step": 18565 }, { "epoch": 0.99, "learning_rate": 3.765372716230881e-05, "loss": 0.8154, "step": 18570 }, { "epoch": 0.99, "learning_rate": 3.7647686974234125e-05, "loss": 0.9034, "step": 18575 }, { "epoch": 0.99, "learning_rate": 3.764164579373647e-05, "loss": 0.7674, "step": 18580 }, { "epoch": 0.99, "learning_rate": 3.763560362128989e-05, "loss": 0.79, "step": 18585 }, { "epoch": 0.99, "learning_rate": 3.762956045736848e-05, "loss": 0.7604, "step": 18590 }, { "epoch": 0.99, "learning_rate": 3.762351630244643e-05, "loss": 0.5801, "step": 18595 }, { "epoch": 1.0, "learning_rate": 3.7617471156998015e-05, "loss": 0.7421, "step": 18600 }, { "epoch": 1.0, "learning_rate": 3.761142502149758e-05, "loss": 0.6293, "step": 18605 }, { "epoch": 1.0, "learning_rate": 3.760537789641952e-05, "loss": 0.761, "step": 18610 }, { "epoch": 1.0, "learning_rate": 3.7599329782238366e-05, "loss": 0.7081, "step": 18615 }, { "epoch": 1.0, "learning_rate": 3.759328067942867e-05, "loss": 0.6849, "step": 18620 }, { "epoch": 1.0, "learning_rate": 3.7587230588465095e-05, "loss": 0.7673, "step": 18625 }, { "epoch": 1.0, "learning_rate": 3.7581179509822376e-05, "loss": 0.6882, "step": 18630 }, { "epoch": 1.0, "learning_rate": 3.7575127443975314e-05, "loss": 0.6605, "step": 18635 }, { "epoch": 1.0, "learning_rate": 3.75690743913988e-05, "loss": 0.6872, "step": 18640 }, { "epoch": 1.0, "learning_rate": 3.756302035256779e-05, "loss": 0.9828, "step": 18645 }, { "epoch": 1.0, "learning_rate": 3.755696532795733e-05, "loss": 0.7231, "step": 18650 }, { "epoch": 1.0, "learning_rate": 3.7550909318042547e-05, "loss": 0.8458, "step": 18655 }, { "epoch": 1.0, "learning_rate": 3.754485232329862e-05, "loss": 0.6933, "step": 18660 }, { "epoch": 1.0, "learning_rate": 3.7538794344200834e-05, "loss": 0.7562, "step": 18665 }, { "epoch": 1.0, "learning_rate": 3.753273538122453e-05, "loss": 0.7589, "step": 18670 }, { "epoch": 1.0, "learning_rate": 3.7526675434845146e-05, "loss": 0.8413, "step": 18675 }, { "epoch": 1.0, "learning_rate": 3.752061450553816e-05, "loss": 0.6979, "step": 18680 }, { "epoch": 1.0, "learning_rate": 3.7514552593779193e-05, "loss": 0.7118, "step": 18685 }, { "epoch": 1.0, "learning_rate": 3.750848970004388e-05, "loss": 0.6427, "step": 18690 }, { "epoch": 1.0, "learning_rate": 3.7502425824807965e-05, "loss": 0.7137, "step": 18695 }, { "epoch": 1.0, "learning_rate": 3.749636096854724e-05, "loss": 0.7334, "step": 18700 }, { "epoch": 1.0, "learning_rate": 3.749029513173763e-05, "loss": 0.6886, "step": 18705 }, { "epoch": 1.0, "learning_rate": 3.748422831485507e-05, "loss": 0.7472, "step": 18710 }, { "epoch": 1.0, "learning_rate": 3.747816051837561e-05, "loss": 0.7642, "step": 18715 }, { "epoch": 1.0, "learning_rate": 3.7472091742775394e-05, "loss": 0.7597, "step": 18720 }, { "epoch": 1.0, "learning_rate": 3.746602198853059e-05, "loss": 0.7698, "step": 18725 }, { "epoch": 1.0, "learning_rate": 3.7459951256117484e-05, "loss": 0.8039, "step": 18730 }, { "epoch": 1.0, "learning_rate": 3.745387954601243e-05, "loss": 0.7821, "step": 18735 }, { "epoch": 1.0, "learning_rate": 3.7447806858691835e-05, "loss": 0.5738, "step": 18740 }, { "epoch": 1.0, "learning_rate": 3.744173319463224e-05, "loss": 0.8147, "step": 18745 }, { "epoch": 1.0, "learning_rate": 3.7435658554310195e-05, "loss": 0.6916, "step": 18750 }, { "epoch": 1.0, "learning_rate": 3.7429582938202365e-05, "loss": 0.633, "step": 18755 }, { "epoch": 1.0, "learning_rate": 3.742350634678549e-05, "loss": 0.6888, "step": 18760 }, { "epoch": 1.0, "learning_rate": 3.741742878053637e-05, "loss": 0.6898, "step": 18765 }, { "epoch": 1.0, "learning_rate": 3.741135023993191e-05, "loss": 0.7246, "step": 18770 }, { "epoch": 1.0, "learning_rate": 3.7405270725449056e-05, "loss": 0.7306, "step": 18775 }, { "epoch": 1.0, "learning_rate": 3.739919023756485e-05, "loss": 0.8678, "step": 18780 }, { "epoch": 1.0, "learning_rate": 3.7393108776756414e-05, "loss": 0.8372, "step": 18785 }, { "epoch": 1.01, "learning_rate": 3.7387026343500934e-05, "loss": 0.6038, "step": 18790 }, { "epoch": 1.01, "learning_rate": 3.738094293827569e-05, "loss": 0.7692, "step": 18795 }, { "epoch": 1.01, "learning_rate": 3.737485856155802e-05, "loss": 0.7382, "step": 18800 }, { "epoch": 1.01, "learning_rate": 3.736877321382534e-05, "loss": 0.8407, "step": 18805 }, { "epoch": 1.01, "learning_rate": 3.736268689555516e-05, "loss": 0.7166, "step": 18810 }, { "epoch": 1.01, "learning_rate": 3.7356599607225036e-05, "loss": 0.7035, "step": 18815 }, { "epoch": 1.01, "learning_rate": 3.735051134931263e-05, "loss": 0.693, "step": 18820 }, { "epoch": 1.01, "learning_rate": 3.7344422122295675e-05, "loss": 0.7892, "step": 18825 }, { "epoch": 1.01, "learning_rate": 3.7338331926651957e-05, "loss": 0.6281, "step": 18830 }, { "epoch": 1.01, "learning_rate": 3.733224076285936e-05, "loss": 0.6696, "step": 18835 }, { "epoch": 1.01, "learning_rate": 3.732614863139585e-05, "loss": 0.748, "step": 18840 }, { "epoch": 1.01, "learning_rate": 3.7320055532739426e-05, "loss": 0.7578, "step": 18845 }, { "epoch": 1.01, "learning_rate": 3.7313961467368225e-05, "loss": 0.7734, "step": 18850 }, { "epoch": 1.01, "learning_rate": 3.730786643576042e-05, "loss": 0.7208, "step": 18855 }, { "epoch": 1.01, "learning_rate": 3.730177043839426e-05, "loss": 0.624, "step": 18860 }, { "epoch": 1.01, "learning_rate": 3.7295673475748085e-05, "loss": 0.631, "step": 18865 }, { "epoch": 1.01, "learning_rate": 3.72895755483003e-05, "loss": 0.7402, "step": 18870 }, { "epoch": 1.01, "learning_rate": 3.7283476656529394e-05, "loss": 0.7745, "step": 18875 }, { "epoch": 1.01, "learning_rate": 3.727737680091392e-05, "loss": 0.7307, "step": 18880 }, { "epoch": 1.01, "learning_rate": 3.727127598193252e-05, "loss": 0.6934, "step": 18885 }, { "epoch": 1.01, "learning_rate": 3.726517420006391e-05, "loss": 0.706, "step": 18890 }, { "epoch": 1.01, "learning_rate": 3.725907145578687e-05, "loss": 0.5992, "step": 18895 }, { "epoch": 1.01, "learning_rate": 3.725296774958026e-05, "loss": 0.7091, "step": 18900 }, { "epoch": 1.01, "learning_rate": 3.7246863081923024e-05, "loss": 0.6441, "step": 18905 }, { "epoch": 1.01, "learning_rate": 3.7240757453294174e-05, "loss": 0.8, "step": 18910 }, { "epoch": 1.01, "learning_rate": 3.7234650864172795e-05, "loss": 0.6456, "step": 18915 }, { "epoch": 1.01, "learning_rate": 3.722854331503806e-05, "loss": 0.7247, "step": 18920 }, { "epoch": 1.01, "learning_rate": 3.72224348063692e-05, "loss": 0.6949, "step": 18925 }, { "epoch": 1.01, "learning_rate": 3.721632533864553e-05, "loss": 0.6897, "step": 18930 }, { "epoch": 1.01, "learning_rate": 3.721021491234644e-05, "loss": 0.6947, "step": 18935 }, { "epoch": 1.01, "learning_rate": 3.720410352795141e-05, "loss": 0.9082, "step": 18940 }, { "epoch": 1.01, "learning_rate": 3.719799118593996e-05, "loss": 0.7506, "step": 18945 }, { "epoch": 1.01, "learning_rate": 3.719187788679172e-05, "loss": 0.6098, "step": 18950 }, { "epoch": 1.01, "learning_rate": 3.718576363098637e-05, "loss": 0.6518, "step": 18955 }, { "epoch": 1.01, "learning_rate": 3.717964841900369e-05, "loss": 0.683, "step": 18960 }, { "epoch": 1.01, "learning_rate": 3.71735322513235e-05, "loss": 0.5155, "step": 18965 }, { "epoch": 1.01, "learning_rate": 3.716741512842573e-05, "loss": 0.7194, "step": 18970 }, { "epoch": 1.02, "learning_rate": 3.716129705079037e-05, "loss": 0.8037, "step": 18975 }, { "epoch": 1.02, "learning_rate": 3.715517801889749e-05, "loss": 0.7316, "step": 18980 }, { "epoch": 1.02, "learning_rate": 3.714905803322722e-05, "loss": 0.5391, "step": 18985 }, { "epoch": 1.02, "learning_rate": 3.714293709425978e-05, "loss": 0.7811, "step": 18990 }, { "epoch": 1.02, "learning_rate": 3.713681520247546e-05, "loss": 0.7145, "step": 18995 }, { "epoch": 1.02, "learning_rate": 3.713069235835463e-05, "loss": 0.7411, "step": 19000 }, { "epoch": 1.02, "learning_rate": 3.712456856237772e-05, "loss": 0.7058, "step": 19005 }, { "epoch": 1.02, "learning_rate": 3.711844381502525e-05, "loss": 0.7548, "step": 19010 }, { "epoch": 1.02, "learning_rate": 3.711231811677781e-05, "loss": 0.8202, "step": 19015 }, { "epoch": 1.02, "learning_rate": 3.710619146811606e-05, "loss": 0.7619, "step": 19020 }, { "epoch": 1.02, "learning_rate": 3.710006386952074e-05, "loss": 0.748, "step": 19025 }, { "epoch": 1.02, "learning_rate": 3.709393532147267e-05, "loss": 0.7685, "step": 19030 }, { "epoch": 1.02, "learning_rate": 3.7087805824452724e-05, "loss": 0.6781, "step": 19035 }, { "epoch": 1.02, "learning_rate": 3.708167537894187e-05, "loss": 0.6863, "step": 19040 }, { "epoch": 1.02, "learning_rate": 3.707554398542115e-05, "loss": 0.7509, "step": 19045 }, { "epoch": 1.02, "learning_rate": 3.7069411644371666e-05, "loss": 0.786, "step": 19050 }, { "epoch": 1.02, "learning_rate": 3.70632783562746e-05, "loss": 0.6377, "step": 19055 }, { "epoch": 1.02, "learning_rate": 3.705714412161123e-05, "loss": 0.7119, "step": 19060 }, { "epoch": 1.02, "learning_rate": 3.705100894086286e-05, "loss": 0.5874, "step": 19065 }, { "epoch": 1.02, "learning_rate": 3.704487281451093e-05, "loss": 0.772, "step": 19070 }, { "epoch": 1.02, "learning_rate": 3.70387357430369e-05, "loss": 0.7138, "step": 19075 }, { "epoch": 1.02, "learning_rate": 3.703259772692233e-05, "loss": 0.7787, "step": 19080 }, { "epoch": 1.02, "learning_rate": 3.702645876664886e-05, "loss": 0.8605, "step": 19085 }, { "epoch": 1.02, "learning_rate": 3.702031886269818e-05, "loss": 0.6587, "step": 19090 }, { "epoch": 1.02, "learning_rate": 3.701417801555208e-05, "loss": 0.816, "step": 19095 }, { "epoch": 1.02, "learning_rate": 3.70080362256924e-05, "loss": 0.8239, "step": 19100 }, { "epoch": 1.02, "learning_rate": 3.700189349360109e-05, "loss": 0.7396, "step": 19105 }, { "epoch": 1.02, "learning_rate": 3.6995749819760116e-05, "loss": 0.7019, "step": 19110 }, { "epoch": 1.02, "learning_rate": 3.698960520465158e-05, "loss": 0.6598, "step": 19115 }, { "epoch": 1.02, "learning_rate": 3.6983459648757615e-05, "loss": 0.8095, "step": 19120 }, { "epoch": 1.02, "learning_rate": 3.697731315256046e-05, "loss": 0.7334, "step": 19125 }, { "epoch": 1.02, "learning_rate": 3.6971165716542386e-05, "loss": 0.669, "step": 19130 }, { "epoch": 1.02, "learning_rate": 3.696501734118577e-05, "loss": 0.6063, "step": 19135 }, { "epoch": 1.02, "learning_rate": 3.6958868026973075e-05, "loss": 0.7111, "step": 19140 }, { "epoch": 1.02, "learning_rate": 3.69527177743868e-05, "loss": 0.606, "step": 19145 }, { "epoch": 1.02, "learning_rate": 3.694656658390954e-05, "loss": 0.7113, "step": 19150 }, { "epoch": 1.02, "learning_rate": 3.694041445602394e-05, "loss": 0.6619, "step": 19155 }, { "epoch": 1.02, "learning_rate": 3.6934261391212775e-05, "loss": 0.7227, "step": 19160 }, { "epoch": 1.03, "learning_rate": 3.6928107389958826e-05, "loss": 0.7112, "step": 19165 }, { "epoch": 1.03, "learning_rate": 3.692195245274499e-05, "loss": 0.721, "step": 19170 }, { "epoch": 1.03, "learning_rate": 3.6915796580054226e-05, "loss": 0.6568, "step": 19175 }, { "epoch": 1.03, "learning_rate": 3.690963977236956e-05, "loss": 0.5884, "step": 19180 }, { "epoch": 1.03, "learning_rate": 3.690348203017409e-05, "loss": 0.668, "step": 19185 }, { "epoch": 1.03, "learning_rate": 3.6897323353951006e-05, "loss": 0.7781, "step": 19190 }, { "epoch": 1.03, "learning_rate": 3.6891163744183566e-05, "loss": 0.7888, "step": 19195 }, { "epoch": 1.03, "learning_rate": 3.688500320135507e-05, "loss": 0.585, "step": 19200 }, { "epoch": 1.03, "learning_rate": 3.6878841725948946e-05, "loss": 0.7379, "step": 19205 }, { "epoch": 1.03, "learning_rate": 3.6872679318448636e-05, "loss": 0.6995, "step": 19210 }, { "epoch": 1.03, "learning_rate": 3.686651597933771e-05, "loss": 0.7771, "step": 19215 }, { "epoch": 1.03, "learning_rate": 3.6860351709099754e-05, "loss": 0.6342, "step": 19220 }, { "epoch": 1.03, "learning_rate": 3.685418650821849e-05, "loss": 0.7564, "step": 19225 }, { "epoch": 1.03, "learning_rate": 3.6848020377177664e-05, "loss": 0.5561, "step": 19230 }, { "epoch": 1.03, "learning_rate": 3.684185331646112e-05, "loss": 0.7206, "step": 19235 }, { "epoch": 1.03, "learning_rate": 3.683568532655276e-05, "loss": 0.7525, "step": 19240 }, { "epoch": 1.03, "learning_rate": 3.682951640793657e-05, "loss": 0.7429, "step": 19245 }, { "epoch": 1.03, "learning_rate": 3.682334656109661e-05, "loss": 0.8073, "step": 19250 }, { "epoch": 1.03, "learning_rate": 3.6817175786517e-05, "loss": 0.6816, "step": 19255 }, { "epoch": 1.03, "learning_rate": 3.6811004084681944e-05, "loss": 0.6836, "step": 19260 }, { "epoch": 1.03, "learning_rate": 3.6804831456075704e-05, "loss": 0.7229, "step": 19265 }, { "epoch": 1.03, "learning_rate": 3.679865790118265e-05, "loss": 0.611, "step": 19270 }, { "epoch": 1.03, "learning_rate": 3.679248342048719e-05, "loss": 0.7383, "step": 19275 }, { "epoch": 1.03, "learning_rate": 3.67863080144738e-05, "loss": 0.6316, "step": 19280 }, { "epoch": 1.03, "learning_rate": 3.6780131683627066e-05, "loss": 0.6876, "step": 19285 }, { "epoch": 1.03, "learning_rate": 3.677395442843162e-05, "loss": 0.7016, "step": 19290 }, { "epoch": 1.03, "learning_rate": 3.676777624937216e-05, "loss": 0.6569, "step": 19295 }, { "epoch": 1.03, "learning_rate": 3.676159714693347e-05, "loss": 0.6753, "step": 19300 }, { "epoch": 1.03, "learning_rate": 3.675541712160042e-05, "loss": 0.7971, "step": 19305 }, { "epoch": 1.03, "learning_rate": 3.674923617385792e-05, "loss": 0.6965, "step": 19310 }, { "epoch": 1.03, "learning_rate": 3.674305430419097e-05, "loss": 0.7058, "step": 19315 }, { "epoch": 1.03, "learning_rate": 3.6736871513084656e-05, "loss": 0.715, "step": 19320 }, { "epoch": 1.03, "learning_rate": 3.673068780102411e-05, "loss": 0.6447, "step": 19325 }, { "epoch": 1.03, "learning_rate": 3.672450316849454e-05, "loss": 0.6567, "step": 19330 }, { "epoch": 1.03, "learning_rate": 3.6718317615981255e-05, "loss": 0.7121, "step": 19335 }, { "epoch": 1.03, "learning_rate": 3.6712131143969596e-05, "loss": 0.6963, "step": 19340 }, { "epoch": 1.03, "learning_rate": 3.6705943752945e-05, "loss": 0.8069, "step": 19345 }, { "epoch": 1.04, "learning_rate": 3.6699755443392983e-05, "loss": 0.6849, "step": 19350 }, { "epoch": 1.04, "learning_rate": 3.669356621579911e-05, "loss": 0.7365, "step": 19355 }, { "epoch": 1.04, "learning_rate": 3.6687376070649024e-05, "loss": 0.6641, "step": 19360 }, { "epoch": 1.04, "learning_rate": 3.668118500842846e-05, "loss": 0.6893, "step": 19365 }, { "epoch": 1.04, "learning_rate": 3.6674993029623207e-05, "loss": 0.7242, "step": 19370 }, { "epoch": 1.04, "learning_rate": 3.666880013471913e-05, "loss": 0.678, "step": 19375 }, { "epoch": 1.04, "learning_rate": 3.6662606324202145e-05, "loss": 0.7119, "step": 19380 }, { "epoch": 1.04, "learning_rate": 3.665641159855829e-05, "loss": 0.6684, "step": 19385 }, { "epoch": 1.04, "learning_rate": 3.665021595827364e-05, "loss": 0.6933, "step": 19390 }, { "epoch": 1.04, "learning_rate": 3.664401940383433e-05, "loss": 0.6098, "step": 19395 }, { "epoch": 1.04, "learning_rate": 3.663782193572659e-05, "loss": 0.7141, "step": 19400 }, { "epoch": 1.04, "learning_rate": 3.6631623554436725e-05, "loss": 0.5819, "step": 19405 }, { "epoch": 1.04, "learning_rate": 3.6625424260451094e-05, "loss": 0.8023, "step": 19410 }, { "epoch": 1.04, "learning_rate": 3.6619224054256135e-05, "loss": 0.7077, "step": 19415 }, { "epoch": 1.04, "learning_rate": 3.661302293633836e-05, "loss": 0.8241, "step": 19420 }, { "epoch": 1.04, "learning_rate": 3.660682090718435e-05, "loss": 0.721, "step": 19425 }, { "epoch": 1.04, "learning_rate": 3.6600617967280756e-05, "loss": 0.7919, "step": 19430 }, { "epoch": 1.04, "learning_rate": 3.6594414117114314e-05, "loss": 0.7374, "step": 19435 }, { "epoch": 1.04, "learning_rate": 3.65882093571718e-05, "loss": 0.9226, "step": 19440 }, { "epoch": 1.04, "learning_rate": 3.65820036879401e-05, "loss": 0.6172, "step": 19445 }, { "epoch": 1.04, "learning_rate": 3.657579710990614e-05, "loss": 0.7177, "step": 19450 }, { "epoch": 1.04, "learning_rate": 3.656958962355693e-05, "loss": 0.5644, "step": 19455 }, { "epoch": 1.04, "learning_rate": 3.6563381229379576e-05, "loss": 0.6877, "step": 19460 }, { "epoch": 1.04, "learning_rate": 3.655717192786119e-05, "loss": 0.7084, "step": 19465 }, { "epoch": 1.04, "learning_rate": 3.655096171948903e-05, "loss": 0.7819, "step": 19470 }, { "epoch": 1.04, "learning_rate": 3.654475060475037e-05, "loss": 0.8572, "step": 19475 }, { "epoch": 1.04, "learning_rate": 3.653853858413259e-05, "loss": 0.6891, "step": 19480 }, { "epoch": 1.04, "learning_rate": 3.653232565812311e-05, "loss": 0.6481, "step": 19485 }, { "epoch": 1.04, "learning_rate": 3.652611182720946e-05, "loss": 0.6192, "step": 19490 }, { "epoch": 1.04, "learning_rate": 3.651989709187921e-05, "loss": 0.7298, "step": 19495 }, { "epoch": 1.04, "learning_rate": 3.6513681452619994e-05, "loss": 0.7542, "step": 19500 }, { "epoch": 1.04, "learning_rate": 3.650746490991956e-05, "loss": 0.7275, "step": 19505 }, { "epoch": 1.04, "learning_rate": 3.6501247464265674e-05, "loss": 0.7142, "step": 19510 }, { "epoch": 1.04, "learning_rate": 3.649502911614623e-05, "loss": 0.7425, "step": 19515 }, { "epoch": 1.04, "learning_rate": 3.6488809866049135e-05, "loss": 0.7094, "step": 19520 }, { "epoch": 1.04, "learning_rate": 3.64825897144624e-05, "loss": 0.7883, "step": 19525 }, { "epoch": 1.04, "learning_rate": 3.64763686618741e-05, "loss": 0.6493, "step": 19530 }, { "epoch": 1.05, "learning_rate": 3.6470146708772384e-05, "loss": 0.6927, "step": 19535 }, { "epoch": 1.05, "learning_rate": 3.646392385564547e-05, "loss": 0.5466, "step": 19540 }, { "epoch": 1.05, "learning_rate": 3.645770010298164e-05, "loss": 0.6321, "step": 19545 }, { "epoch": 1.05, "learning_rate": 3.645147545126926e-05, "loss": 0.8171, "step": 19550 }, { "epoch": 1.05, "learning_rate": 3.644524990099675e-05, "loss": 0.5653, "step": 19555 }, { "epoch": 1.05, "learning_rate": 3.64390234526526e-05, "loss": 0.7726, "step": 19560 }, { "epoch": 1.05, "learning_rate": 3.6432796106725396e-05, "loss": 0.6603, "step": 19565 }, { "epoch": 1.05, "learning_rate": 3.642656786370378e-05, "loss": 0.6843, "step": 19570 }, { "epoch": 1.05, "learning_rate": 3.642033872407645e-05, "loss": 0.6938, "step": 19575 }, { "epoch": 1.05, "learning_rate": 3.6414108688332186e-05, "loss": 0.677, "step": 19580 }, { "epoch": 1.05, "learning_rate": 3.640787775695985e-05, "loss": 0.6901, "step": 19585 }, { "epoch": 1.05, "learning_rate": 3.6401645930448356e-05, "loss": 0.671, "step": 19590 }, { "epoch": 1.05, "learning_rate": 3.6395413209286686e-05, "loss": 0.741, "step": 19595 }, { "epoch": 1.05, "learning_rate": 3.6389179593963914e-05, "loss": 0.8225, "step": 19600 }, { "epoch": 1.05, "learning_rate": 3.6382945084969165e-05, "loss": 0.715, "step": 19605 }, { "epoch": 1.05, "learning_rate": 3.637670968279165e-05, "loss": 0.6918, "step": 19610 }, { "epoch": 1.05, "learning_rate": 3.637047338792063e-05, "loss": 0.6316, "step": 19615 }, { "epoch": 1.05, "learning_rate": 3.6364236200845456e-05, "loss": 0.7644, "step": 19620 }, { "epoch": 1.05, "learning_rate": 3.635799812205554e-05, "loss": 0.659, "step": 19625 }, { "epoch": 1.05, "learning_rate": 3.6351759152040355e-05, "loss": 0.6877, "step": 19630 }, { "epoch": 1.05, "learning_rate": 3.634551929128945e-05, "loss": 0.7445, "step": 19635 }, { "epoch": 1.05, "learning_rate": 3.633927854029246e-05, "loss": 0.7579, "step": 19640 }, { "epoch": 1.05, "learning_rate": 3.633303689953907e-05, "loss": 0.8384, "step": 19645 }, { "epoch": 1.05, "learning_rate": 3.6326794369519034e-05, "loss": 0.6814, "step": 19650 }, { "epoch": 1.05, "learning_rate": 3.6320550950722197e-05, "loss": 0.7199, "step": 19655 }, { "epoch": 1.05, "learning_rate": 3.631430664363846e-05, "loss": 0.7483, "step": 19660 }, { "epoch": 1.05, "learning_rate": 3.6308061448757776e-05, "loss": 0.5444, "step": 19665 }, { "epoch": 1.05, "learning_rate": 3.630181536657019e-05, "loss": 0.7758, "step": 19670 }, { "epoch": 1.05, "learning_rate": 3.629556839756583e-05, "loss": 0.7804, "step": 19675 }, { "epoch": 1.05, "learning_rate": 3.628932054223486e-05, "loss": 0.7373, "step": 19680 }, { "epoch": 1.05, "learning_rate": 3.6283071801067524e-05, "loss": 0.6756, "step": 19685 }, { "epoch": 1.05, "learning_rate": 3.6276822174554156e-05, "loss": 0.6399, "step": 19690 }, { "epoch": 1.05, "learning_rate": 3.627057166318514e-05, "loss": 0.7184, "step": 19695 }, { "epoch": 1.05, "learning_rate": 3.626432026745092e-05, "loss": 0.7802, "step": 19700 }, { "epoch": 1.05, "learning_rate": 3.6258067987842045e-05, "loss": 0.8092, "step": 19705 }, { "epoch": 1.05, "learning_rate": 3.625181482484908e-05, "loss": 0.7534, "step": 19710 }, { "epoch": 1.05, "learning_rate": 3.624556077896273e-05, "loss": 0.6718, "step": 19715 }, { "epoch": 1.05, "learning_rate": 3.62393058506737e-05, "loss": 0.5859, "step": 19720 }, { "epoch": 1.06, "learning_rate": 3.62330500404728e-05, "loss": 0.7219, "step": 19725 }, { "epoch": 1.06, "learning_rate": 3.622679334885091e-05, "loss": 0.7059, "step": 19730 }, { "epoch": 1.06, "learning_rate": 3.622053577629896e-05, "loss": 0.6691, "step": 19735 }, { "epoch": 1.06, "learning_rate": 3.621427732330797e-05, "loss": 0.6074, "step": 19740 }, { "epoch": 1.06, "learning_rate": 3.6208017990369024e-05, "loss": 0.7223, "step": 19745 }, { "epoch": 1.06, "learning_rate": 3.620175777797328e-05, "loss": 0.6325, "step": 19750 }, { "epoch": 1.06, "learning_rate": 3.6195496686611926e-05, "loss": 0.7658, "step": 19755 }, { "epoch": 1.06, "learning_rate": 3.618923471677626e-05, "loss": 0.6585, "step": 19760 }, { "epoch": 1.06, "learning_rate": 3.618297186895767e-05, "loss": 0.6261, "step": 19765 }, { "epoch": 1.06, "learning_rate": 3.617670814364754e-05, "loss": 0.7079, "step": 19770 }, { "epoch": 1.06, "learning_rate": 3.6170443541337394e-05, "loss": 0.7239, "step": 19775 }, { "epoch": 1.06, "learning_rate": 3.616417806251877e-05, "loss": 0.6381, "step": 19780 }, { "epoch": 1.06, "learning_rate": 3.615791170768332e-05, "loss": 0.6819, "step": 19785 }, { "epoch": 1.06, "learning_rate": 3.615164447732274e-05, "loss": 0.6381, "step": 19790 }, { "epoch": 1.06, "learning_rate": 3.614537637192879e-05, "loss": 0.7217, "step": 19795 }, { "epoch": 1.06, "learning_rate": 3.6139107391993316e-05, "loss": 0.7405, "step": 19800 }, { "epoch": 1.06, "learning_rate": 3.6132837538008225e-05, "loss": 0.6009, "step": 19805 }, { "epoch": 1.06, "learning_rate": 3.6126566810465486e-05, "loss": 0.7134, "step": 19810 }, { "epoch": 1.06, "learning_rate": 3.612029520985715e-05, "loss": 0.7042, "step": 19815 }, { "epoch": 1.06, "learning_rate": 3.6114022736675315e-05, "loss": 0.7595, "step": 19820 }, { "epoch": 1.06, "learning_rate": 3.6107749391412184e-05, "loss": 0.795, "step": 19825 }, { "epoch": 1.06, "learning_rate": 3.610147517455999e-05, "loss": 0.6147, "step": 19830 }, { "epoch": 1.06, "learning_rate": 3.609520008661105e-05, "loss": 0.7308, "step": 19835 }, { "epoch": 1.06, "learning_rate": 3.6088924128057766e-05, "loss": 0.6944, "step": 19840 }, { "epoch": 1.06, "learning_rate": 3.608264729939257e-05, "loss": 0.5859, "step": 19845 }, { "epoch": 1.06, "learning_rate": 3.6076369601108e-05, "loss": 0.5451, "step": 19850 }, { "epoch": 1.06, "learning_rate": 3.607009103369663e-05, "loss": 0.7287, "step": 19855 }, { "epoch": 1.06, "learning_rate": 3.606381159765115e-05, "loss": 0.7377, "step": 19860 }, { "epoch": 1.06, "learning_rate": 3.605753129346425e-05, "loss": 0.7201, "step": 19865 }, { "epoch": 1.06, "learning_rate": 3.605125012162875e-05, "loss": 0.5411, "step": 19870 }, { "epoch": 1.06, "learning_rate": 3.604496808263751e-05, "loss": 0.6901, "step": 19875 }, { "epoch": 1.06, "learning_rate": 3.6038685176983445e-05, "loss": 0.6943, "step": 19880 }, { "epoch": 1.06, "learning_rate": 3.603240140515957e-05, "loss": 0.7507, "step": 19885 }, { "epoch": 1.06, "learning_rate": 3.6026116767658954e-05, "loss": 0.7565, "step": 19890 }, { "epoch": 1.06, "learning_rate": 3.601983126497472e-05, "loss": 0.6515, "step": 19895 }, { "epoch": 1.06, "learning_rate": 3.601354489760008e-05, "loss": 0.6443, "step": 19900 }, { "epoch": 1.06, "learning_rate": 3.600725766602831e-05, "loss": 0.722, "step": 19905 }, { "epoch": 1.07, "learning_rate": 3.600096957075273e-05, "loss": 0.6648, "step": 19910 }, { "epoch": 1.07, "learning_rate": 3.5994680612266756e-05, "loss": 0.6729, "step": 19915 }, { "epoch": 1.07, "learning_rate": 3.598839079106387e-05, "loss": 0.7067, "step": 19920 }, { "epoch": 1.07, "learning_rate": 3.598210010763761e-05, "loss": 0.5048, "step": 19925 }, { "epoch": 1.07, "learning_rate": 3.597580856248157e-05, "loss": 0.7746, "step": 19930 }, { "epoch": 1.07, "learning_rate": 3.596951615608945e-05, "loss": 0.6867, "step": 19935 }, { "epoch": 1.07, "learning_rate": 3.596322288895498e-05, "loss": 0.8095, "step": 19940 }, { "epoch": 1.07, "learning_rate": 3.5956928761571976e-05, "loss": 0.5721, "step": 19945 }, { "epoch": 1.07, "learning_rate": 3.595063377443433e-05, "loss": 0.539, "step": 19950 }, { "epoch": 1.07, "learning_rate": 3.5944337928035964e-05, "loss": 0.5423, "step": 19955 }, { "epoch": 1.07, "learning_rate": 3.593804122287091e-05, "loss": 0.7047, "step": 19960 }, { "epoch": 1.07, "learning_rate": 3.5931743659433253e-05, "loss": 0.6379, "step": 19965 }, { "epoch": 1.07, "learning_rate": 3.592544523821712e-05, "loss": 0.671, "step": 19970 }, { "epoch": 1.07, "learning_rate": 3.5919145959716765e-05, "loss": 0.6506, "step": 19975 }, { "epoch": 1.07, "learning_rate": 3.591284582442644e-05, "loss": 0.725, "step": 19980 }, { "epoch": 1.07, "learning_rate": 3.59065448328405e-05, "loss": 0.9172, "step": 19985 }, { "epoch": 1.07, "learning_rate": 3.590024298545338e-05, "loss": 0.6289, "step": 19990 }, { "epoch": 1.07, "learning_rate": 3.5893940282759555e-05, "loss": 0.7067, "step": 19995 }, { "epoch": 1.07, "learning_rate": 3.5887636725253574e-05, "loss": 0.6789, "step": 20000 } ], "logging_steps": 5, "max_steps": 56076, "num_train_epochs": 3, "save_steps": 500, "total_flos": 2.3139803180942623e+18, "trial_name": null, "trial_params": null }