diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,24019 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.069933530379425, + "eval_steps": 500, + "global_step": 20000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999999901916556e-05, + "loss": 1.7897, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999996076662294e-05, + "loss": 1.8861, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999117249044e-05, + "loss": 1.4411, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998430665038e-05, + "loss": 1.4184, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999975479142666e-05, + "loss": 1.2592, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996468996798e-05, + "loss": 1.223, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995193912717e-05, + "loss": 1.2161, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993722662123e-05, + "loss": 1.2416, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999920552451324e-05, + "loss": 1.0517, + "step": 45 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999901916618755e-05, + "loss": 1.2388, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988131912499e-05, + "loss": 1.2804, + "step": 55 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985875997164e-05, + "loss": 1.0287, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983423916048e-05, + "loss": 1.1188, + "step": 65 + }, + { + "epoch": 0.0, + "learning_rate": 4.999980775669344e-05, + "loss": 1.1104, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999779312572584e-05, + "loss": 1.157, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 4.999974890680015e-05, + "loss": 1.1552, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 4.999971653937852e-05, + "loss": 1.1373, + "step": 85 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999682210310237e-05, + "loss": 1.2307, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999645919598e-05, + "loss": 0.9955, + "step": 95 + }, + { + "epoch": 0.01, + "learning_rate": 4.999960766724465e-05, + "loss": 0.9544, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999956745325318e-05, + "loss": 1.0432, + "step": 105 + }, + { + "epoch": 0.01, + "learning_rate": 4.999952527762677e-05, + "loss": 1.1078, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999948114036871e-05, + "loss": 1.0228, + "step": 115 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999435041482466e-05, + "loss": 1.0644, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.999938698097166e-05, + "loss": 1.0653, + "step": 125 + }, + { + "epoch": 0.01, + "learning_rate": 4.999933695884007e-05, + "loss": 1.0642, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.99992849750916e-05, + "loss": 0.9661, + "step": 135 + }, + { + "epoch": 0.01, + "learning_rate": 4.999923102973034e-05, + "loss": 1.0496, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999917512276053e-05, + "loss": 0.9529, + "step": 145 + }, + { + "epoch": 0.01, + "learning_rate": 4.999911725418655e-05, + "loss": 0.999, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905742401294e-05, + "loss": 1.023, + "step": 155 + }, + { + "epoch": 0.01, + "learning_rate": 4.999899563224439e-05, + "loss": 1.0026, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999893187888577e-05, + "loss": 1.0805, + "step": 165 + }, + { + "epoch": 0.01, + "learning_rate": 4.9998866163942055e-05, + "loss": 1.1233, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.999879848741842e-05, + "loss": 1.1635, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 4.999872884932016e-05, + "loss": 0.9927, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999865724965276e-05, + "loss": 1.096, + "step": 185 + }, + { + "epoch": 0.01, + "learning_rate": 4.999858368842182e-05, + "loss": 1.0914, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999850816563312e-05, + "loss": 0.9758, + "step": 195 + }, + { + "epoch": 0.01, + "learning_rate": 4.999843068129258e-05, + "loss": 0.9528, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.9998351235406284e-05, + "loss": 1.033, + "step": 205 + }, + { + "epoch": 0.01, + "learning_rate": 4.999826982798047e-05, + "loss": 1.2451, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 4.999818645902152e-05, + "loss": 0.9208, + "step": 215 + }, + { + "epoch": 0.01, + "learning_rate": 4.9998101128535984e-05, + "loss": 0.9928, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 4.999801383653055e-05, + "loss": 0.8492, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.999792458301207e-05, + "loss": 0.8356, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 4.999783336798754e-05, + "loss": 0.8712, + "step": 235 + }, + { + "epoch": 0.01, + "learning_rate": 4.999774019146413e-05, + "loss": 1.1794, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 4.999764505344914e-05, + "loss": 0.8604, + "step": 245 + }, + { + "epoch": 0.01, + "learning_rate": 4.999754795395004e-05, + "loss": 1.0337, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 4.999744889297445e-05, + "loss": 0.9918, + "step": 255 + }, + { + "epoch": 0.01, + "learning_rate": 4.999734787053014e-05, + "loss": 0.9943, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 4.999724488662505e-05, + "loss": 1.0025, + "step": 265 + }, + { + "epoch": 0.01, + "learning_rate": 4.999713994126724e-05, + "loss": 1.0377, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 4.999703303446496e-05, + "loss": 0.7867, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 4.999692416622659e-05, + "loss": 1.0016, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.999681333656068e-05, + "loss": 0.8957, + "step": 285 + }, + { + "epoch": 0.02, + "learning_rate": 4.999670054547592e-05, + "loss": 0.9476, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.999658579298116e-05, + "loss": 0.8151, + "step": 295 + }, + { + "epoch": 0.02, + "learning_rate": 4.99964690790854e-05, + "loss": 1.0061, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.999635040379782e-05, + "loss": 0.9179, + "step": 305 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622976712771e-05, + "loss": 1.0512, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.999610716908455e-05, + "loss": 0.9141, + "step": 315 + }, + { + "epoch": 0.02, + "learning_rate": 4.999598260967795e-05, + "loss": 0.8681, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.999585608891768e-05, + "loss": 0.8928, + "step": 325 + }, + { + "epoch": 0.02, + "learning_rate": 4.999572760681368e-05, + "loss": 0.9659, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9995597163376025e-05, + "loss": 1.1282, + "step": 335 + }, + { + "epoch": 0.02, + "learning_rate": 4.999546475861495e-05, + "loss": 1.0277, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.9995330392540846e-05, + "loss": 0.8917, + "step": 345 + }, + { + "epoch": 0.02, + "learning_rate": 4.999519406516426e-05, + "loss": 0.9693, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 4.999505577649588e-05, + "loss": 0.9954, + "step": 355 + }, + { + "epoch": 0.02, + "learning_rate": 4.9994915526546565e-05, + "loss": 0.7738, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 4.999477331532732e-05, + "loss": 0.9392, + "step": 365 + }, + { + "epoch": 0.02, + "learning_rate": 4.999462914284929e-05, + "loss": 0.9293, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 4.9994483009123816e-05, + "loss": 1.0134, + "step": 375 + }, + { + "epoch": 0.02, + "learning_rate": 4.999433491416233e-05, + "loss": 0.9113, + "step": 380 + }, + { + "epoch": 0.02, + "learning_rate": 4.9994184857976484e-05, + "loss": 0.97, + "step": 385 + }, + { + "epoch": 0.02, + "learning_rate": 4.999403284057803e-05, + "loss": 0.8572, + "step": 390 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993878861978914e-05, + "loss": 0.7934, + "step": 395 + }, + { + "epoch": 0.02, + "learning_rate": 4.99937229221912e-05, + "loss": 1.1504, + "step": 400 + }, + { + "epoch": 0.02, + "learning_rate": 4.999356502122714e-05, + "loss": 0.971, + "step": 405 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993405159099115e-05, + "loss": 0.7483, + "step": 410 + }, + { + "epoch": 0.02, + "learning_rate": 4.999324333581967e-05, + "loss": 0.8933, + "step": 415 + }, + { + "epoch": 0.02, + "learning_rate": 4.99930795514015e-05, + "loss": 0.8414, + "step": 420 + }, + { + "epoch": 0.02, + "learning_rate": 4.9992913805857465e-05, + "loss": 0.991, + "step": 425 + }, + { + "epoch": 0.02, + "learning_rate": 4.999274609920056e-05, + "loss": 1.0806, + "step": 430 + }, + { + "epoch": 0.02, + "learning_rate": 4.999257643144396e-05, + "loss": 1.0021, + "step": 435 + }, + { + "epoch": 0.02, + "learning_rate": 4.9992404802600963e-05, + "loss": 0.9346, + "step": 440 + }, + { + "epoch": 0.02, + "learning_rate": 4.999223121268504e-05, + "loss": 0.8399, + "step": 445 + }, + { + "epoch": 0.02, + "learning_rate": 4.999205566170981e-05, + "loss": 1.0498, + "step": 450 + }, + { + "epoch": 0.02, + "learning_rate": 4.999187814968906e-05, + "loss": 0.8322, + "step": 455 + }, + { + "epoch": 0.02, + "learning_rate": 4.999169867663671e-05, + "loss": 0.768, + "step": 460 + }, + { + "epoch": 0.02, + "learning_rate": 4.999151724256684e-05, + "loss": 0.9191, + "step": 465 + }, + { + "epoch": 0.03, + "learning_rate": 4.999133384749369e-05, + "loss": 0.8601, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.999114849143165e-05, + "loss": 0.9834, + "step": 475 + }, + { + "epoch": 0.03, + "learning_rate": 4.999096117439527e-05, + "loss": 1.0768, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.999077189639924e-05, + "loss": 0.8655, + "step": 485 + }, + { + "epoch": 0.03, + "learning_rate": 4.999058065745841e-05, + "loss": 1.01, + "step": 490 + }, + { + "epoch": 0.03, + "learning_rate": 4.999038745758779e-05, + "loss": 1.0327, + "step": 495 + }, + { + "epoch": 0.03, + "learning_rate": 4.999019229680254e-05, + "loss": 0.8839, + "step": 500 + }, + { + "epoch": 0.03, + "learning_rate": 4.998999517511798e-05, + "loss": 1.0411, + "step": 505 + }, + { + "epoch": 0.03, + "learning_rate": 4.998979609254957e-05, + "loss": 0.8918, + "step": 510 + }, + { + "epoch": 0.03, + "learning_rate": 4.998959504911293e-05, + "loss": 0.8533, + "step": 515 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989392044823836e-05, + "loss": 0.9949, + "step": 520 + }, + { + "epoch": 0.03, + "learning_rate": 4.998918707969822e-05, + "loss": 0.9551, + "step": 525 + }, + { + "epoch": 0.03, + "learning_rate": 4.9988980153752164e-05, + "loss": 1.0028, + "step": 530 + }, + { + "epoch": 0.03, + "learning_rate": 4.998877126700191e-05, + "loss": 0.9293, + "step": 535 + }, + { + "epoch": 0.03, + "learning_rate": 4.9988560419463836e-05, + "loss": 0.9163, + "step": 540 + }, + { + "epoch": 0.03, + "learning_rate": 4.9988347611154504e-05, + "loss": 0.9049, + "step": 545 + }, + { + "epoch": 0.03, + "learning_rate": 4.9988132842090596e-05, + "loss": 0.9456, + "step": 550 + }, + { + "epoch": 0.03, + "learning_rate": 4.998791611228897e-05, + "loss": 0.9466, + "step": 555 + }, + { + "epoch": 0.03, + "learning_rate": 4.998769742176663e-05, + "loss": 0.9389, + "step": 560 + }, + { + "epoch": 0.03, + "learning_rate": 4.998747677054074e-05, + "loss": 0.8853, + "step": 565 + }, + { + "epoch": 0.03, + "learning_rate": 4.998725415862861e-05, + "loss": 0.797, + "step": 570 + }, + { + "epoch": 0.03, + "learning_rate": 4.998702958604772e-05, + "loss": 1.0129, + "step": 575 + }, + { + "epoch": 0.03, + "learning_rate": 4.998680305281568e-05, + "loss": 0.87, + "step": 580 + }, + { + "epoch": 0.03, + "learning_rate": 4.998657455895026e-05, + "loss": 0.9552, + "step": 585 + }, + { + "epoch": 0.03, + "learning_rate": 4.99863441044694e-05, + "loss": 0.7886, + "step": 590 + }, + { + "epoch": 0.03, + "learning_rate": 4.9986111689391174e-05, + "loss": 0.9748, + "step": 595 + }, + { + "epoch": 0.03, + "learning_rate": 4.998587731373383e-05, + "loss": 0.9811, + "step": 600 + }, + { + "epoch": 0.03, + "learning_rate": 4.9985640977515744e-05, + "loss": 0.8928, + "step": 605 + }, + { + "epoch": 0.03, + "learning_rate": 4.998540268075548e-05, + "loss": 0.9, + "step": 610 + }, + { + "epoch": 0.03, + "learning_rate": 4.998516242347172e-05, + "loss": 0.8575, + "step": 615 + }, + { + "epoch": 0.03, + "learning_rate": 4.998492020568332e-05, + "loss": 0.8665, + "step": 620 + }, + { + "epoch": 0.03, + "learning_rate": 4.998467602740929e-05, + "loss": 0.9402, + "step": 625 + }, + { + "epoch": 0.03, + "learning_rate": 4.998442988866879e-05, + "loss": 0.9656, + "step": 630 + }, + { + "epoch": 0.03, + "learning_rate": 4.9984181789481134e-05, + "loss": 0.9597, + "step": 635 + }, + { + "epoch": 0.03, + "learning_rate": 4.998393172986577e-05, + "loss": 0.8586, + "step": 640 + }, + { + "epoch": 0.03, + "learning_rate": 4.998367970984236e-05, + "loss": 0.8484, + "step": 645 + }, + { + "epoch": 0.03, + "learning_rate": 4.998342572943064e-05, + "loss": 0.955, + "step": 650 + }, + { + "epoch": 0.04, + "learning_rate": 4.998316978865055e-05, + "loss": 0.9496, + "step": 655 + }, + { + "epoch": 0.04, + "learning_rate": 4.998291188752219e-05, + "loss": 0.8029, + "step": 660 + }, + { + "epoch": 0.04, + "learning_rate": 4.998265202606578e-05, + "loss": 1.0053, + "step": 665 + }, + { + "epoch": 0.04, + "learning_rate": 4.9982390204301715e-05, + "loss": 1.0095, + "step": 670 + }, + { + "epoch": 0.04, + "learning_rate": 4.9982126422250534e-05, + "loss": 0.9966, + "step": 675 + }, + { + "epoch": 0.04, + "learning_rate": 4.9981860679932944e-05, + "loss": 0.8871, + "step": 680 + }, + { + "epoch": 0.04, + "learning_rate": 4.99815929773698e-05, + "loss": 0.7858, + "step": 685 + }, + { + "epoch": 0.04, + "learning_rate": 4.9981323314582085e-05, + "loss": 0.9204, + "step": 690 + }, + { + "epoch": 0.04, + "learning_rate": 4.9981051691590984e-05, + "loss": 1.0274, + "step": 695 + }, + { + "epoch": 0.04, + "learning_rate": 4.99807781084178e-05, + "loss": 1.0154, + "step": 700 + }, + { + "epoch": 0.04, + "learning_rate": 4.9980502565083996e-05, + "loss": 0.7178, + "step": 705 + }, + { + "epoch": 0.04, + "learning_rate": 4.99802250616112e-05, + "loss": 0.9133, + "step": 710 + }, + { + "epoch": 0.04, + "learning_rate": 4.997994559802118e-05, + "loss": 1.034, + "step": 715 + }, + { + "epoch": 0.04, + "learning_rate": 4.9979664174335874e-05, + "loss": 1.0744, + "step": 720 + }, + { + "epoch": 0.04, + "learning_rate": 4.9979380790577364e-05, + "loss": 0.9425, + "step": 725 + }, + { + "epoch": 0.04, + "learning_rate": 4.997909544676788e-05, + "loss": 0.9228, + "step": 730 + }, + { + "epoch": 0.04, + "learning_rate": 4.997880814292981e-05, + "loss": 0.7898, + "step": 735 + }, + { + "epoch": 0.04, + "learning_rate": 4.99785188790857e-05, + "loss": 0.9179, + "step": 740 + }, + { + "epoch": 0.04, + "learning_rate": 4.9978227655258246e-05, + "loss": 0.9558, + "step": 745 + }, + { + "epoch": 0.04, + "learning_rate": 4.997793447147031e-05, + "loss": 0.8552, + "step": 750 + }, + { + "epoch": 0.04, + "learning_rate": 4.997763932774489e-05, + "loss": 0.8245, + "step": 755 + }, + { + "epoch": 0.04, + "learning_rate": 4.997734222410514e-05, + "loss": 0.9163, + "step": 760 + }, + { + "epoch": 0.04, + "learning_rate": 4.997704316057438e-05, + "loss": 1.0522, + "step": 765 + }, + { + "epoch": 0.04, + "learning_rate": 4.997674213717607e-05, + "loss": 0.7773, + "step": 770 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976439153933843e-05, + "loss": 0.8591, + "step": 775 + }, + { + "epoch": 0.04, + "learning_rate": 4.997613421087146e-05, + "loss": 0.9712, + "step": 780 + }, + { + "epoch": 0.04, + "learning_rate": 4.997582730801285e-05, + "loss": 0.7712, + "step": 785 + }, + { + "epoch": 0.04, + "learning_rate": 4.99755184453821e-05, + "loss": 0.797, + "step": 790 + }, + { + "epoch": 0.04, + "learning_rate": 4.997520762300344e-05, + "loss": 1.0037, + "step": 795 + }, + { + "epoch": 0.04, + "learning_rate": 4.997489484090127e-05, + "loss": 1.0809, + "step": 800 + }, + { + "epoch": 0.04, + "learning_rate": 4.9974580099100124e-05, + "loss": 0.9685, + "step": 805 + }, + { + "epoch": 0.04, + "learning_rate": 4.9974263397624695e-05, + "loss": 0.8071, + "step": 810 + }, + { + "epoch": 0.04, + "learning_rate": 4.9973944736499837e-05, + "loss": 0.9863, + "step": 815 + }, + { + "epoch": 0.04, + "learning_rate": 4.997362411575056e-05, + "loss": 0.8905, + "step": 820 + }, + { + "epoch": 0.04, + "learning_rate": 4.9973301535402025e-05, + "loss": 0.8756, + "step": 825 + }, + { + "epoch": 0.04, + "learning_rate": 4.997297699547953e-05, + "loss": 0.9958, + "step": 830 + }, + { + "epoch": 0.04, + "learning_rate": 4.997265049600854e-05, + "loss": 1.1043, + "step": 835 + }, + { + "epoch": 0.04, + "learning_rate": 4.99723220370147e-05, + "loss": 0.8283, + "step": 840 + }, + { + "epoch": 0.05, + "learning_rate": 4.997199161852375e-05, + "loss": 0.958, + "step": 845 + }, + { + "epoch": 0.05, + "learning_rate": 4.997165924056164e-05, + "loss": 1.1133, + "step": 850 + }, + { + "epoch": 0.05, + "learning_rate": 4.997132490315444e-05, + "loss": 0.9223, + "step": 855 + }, + { + "epoch": 0.05, + "learning_rate": 4.9970988606328397e-05, + "loss": 0.8501, + "step": 860 + }, + { + "epoch": 0.05, + "learning_rate": 4.997065035010987e-05, + "loss": 0.8628, + "step": 865 + }, + { + "epoch": 0.05, + "learning_rate": 4.997031013452543e-05, + "loss": 0.8997, + "step": 870 + }, + { + "epoch": 0.05, + "learning_rate": 4.996996795960176e-05, + "loss": 0.7625, + "step": 875 + }, + { + "epoch": 0.05, + "learning_rate": 4.996962382536572e-05, + "loss": 0.8228, + "step": 880 + }, + { + "epoch": 0.05, + "learning_rate": 4.99692777318443e-05, + "loss": 0.7735, + "step": 885 + }, + { + "epoch": 0.05, + "learning_rate": 4.9968929679064655e-05, + "loss": 0.8135, + "step": 890 + }, + { + "epoch": 0.05, + "learning_rate": 4.9968579667054117e-05, + "loss": 0.8264, + "step": 895 + }, + { + "epoch": 0.05, + "learning_rate": 4.996822769584013e-05, + "loss": 0.8968, + "step": 900 + }, + { + "epoch": 0.05, + "learning_rate": 4.996787376545031e-05, + "loss": 0.8806, + "step": 905 + }, + { + "epoch": 0.05, + "learning_rate": 4.9967517875912446e-05, + "loss": 0.799, + "step": 910 + }, + { + "epoch": 0.05, + "learning_rate": 4.9967160027254446e-05, + "loss": 1.0222, + "step": 915 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966800219504405e-05, + "loss": 0.906, + "step": 920 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966438452690545e-05, + "loss": 0.8508, + "step": 925 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966074726841254e-05, + "loss": 0.9311, + "step": 930 + }, + { + "epoch": 0.05, + "learning_rate": 4.996570904198508e-05, + "loss": 0.8446, + "step": 935 + }, + { + "epoch": 0.05, + "learning_rate": 4.99653413981507e-05, + "loss": 0.8019, + "step": 940 + }, + { + "epoch": 0.05, + "learning_rate": 4.996497179536699e-05, + "loss": 0.8191, + "step": 945 + }, + { + "epoch": 0.05, + "learning_rate": 4.9964600233662915e-05, + "loss": 1.0145, + "step": 950 + }, + { + "epoch": 0.05, + "learning_rate": 4.996422671306766e-05, + "loss": 0.8515, + "step": 955 + }, + { + "epoch": 0.05, + "learning_rate": 4.996385123361053e-05, + "loss": 0.8751, + "step": 960 + }, + { + "epoch": 0.05, + "learning_rate": 4.996347379532097e-05, + "loss": 0.8246, + "step": 965 + }, + { + "epoch": 0.05, + "learning_rate": 4.996309439822862e-05, + "loss": 0.8811, + "step": 970 + }, + { + "epoch": 0.05, + "learning_rate": 4.996271304236323e-05, + "loss": 0.8678, + "step": 975 + }, + { + "epoch": 0.05, + "learning_rate": 4.996232972775474e-05, + "loss": 0.7357, + "step": 980 + }, + { + "epoch": 0.05, + "learning_rate": 4.9961944454433204e-05, + "loss": 0.8088, + "step": 985 + }, + { + "epoch": 0.05, + "learning_rate": 4.996155722242888e-05, + "loss": 0.8257, + "step": 990 + }, + { + "epoch": 0.05, + "learning_rate": 4.996116803177214e-05, + "loss": 0.8482, + "step": 995 + }, + { + "epoch": 0.05, + "learning_rate": 4.996077688249352e-05, + "loss": 0.928, + "step": 1000 + }, + { + "epoch": 0.05, + "learning_rate": 4.996038377462372e-05, + "loss": 0.8771, + "step": 1005 + }, + { + "epoch": 0.05, + "learning_rate": 4.9959988708193585e-05, + "loss": 1.0969, + "step": 1010 + }, + { + "epoch": 0.05, + "learning_rate": 4.9959591683234106e-05, + "loss": 0.811, + "step": 1015 + }, + { + "epoch": 0.05, + "learning_rate": 4.9959192699776445e-05, + "loss": 0.8073, + "step": 1020 + }, + { + "epoch": 0.05, + "learning_rate": 4.99587917578519e-05, + "loss": 0.8761, + "step": 1025 + }, + { + "epoch": 0.06, + "learning_rate": 4.995838885749194e-05, + "loss": 0.8305, + "step": 1030 + }, + { + "epoch": 0.06, + "learning_rate": 4.995798399872818e-05, + "loss": 0.9013, + "step": 1035 + }, + { + "epoch": 0.06, + "learning_rate": 4.995757718159238e-05, + "loss": 0.8605, + "step": 1040 + }, + { + "epoch": 0.06, + "learning_rate": 4.995716840611647e-05, + "loss": 0.7716, + "step": 1045 + }, + { + "epoch": 0.06, + "learning_rate": 4.9956757672332515e-05, + "loss": 0.9188, + "step": 1050 + }, + { + "epoch": 0.06, + "learning_rate": 4.995634498027275e-05, + "loss": 0.9129, + "step": 1055 + }, + { + "epoch": 0.06, + "learning_rate": 4.995593032996957e-05, + "loss": 0.8646, + "step": 1060 + }, + { + "epoch": 0.06, + "learning_rate": 4.995551372145549e-05, + "loss": 0.847, + "step": 1065 + }, + { + "epoch": 0.06, + "learning_rate": 4.995509515476321e-05, + "loss": 0.9522, + "step": 1070 + }, + { + "epoch": 0.06, + "learning_rate": 4.9954674629925576e-05, + "loss": 0.9094, + "step": 1075 + }, + { + "epoch": 0.06, + "learning_rate": 4.995425214697558e-05, + "loss": 0.8598, + "step": 1080 + }, + { + "epoch": 0.06, + "learning_rate": 4.995382770594637e-05, + "loss": 0.7556, + "step": 1085 + }, + { + "epoch": 0.06, + "learning_rate": 4.995340130687126e-05, + "loss": 0.891, + "step": 1090 + }, + { + "epoch": 0.06, + "learning_rate": 4.9952972949783706e-05, + "loss": 0.7336, + "step": 1095 + }, + { + "epoch": 0.06, + "learning_rate": 4.995254263471732e-05, + "loss": 0.8502, + "step": 1100 + }, + { + "epoch": 0.06, + "learning_rate": 4.995211036170585e-05, + "loss": 0.8469, + "step": 1105 + }, + { + "epoch": 0.06, + "learning_rate": 4.995167613078324e-05, + "loss": 0.8072, + "step": 1110 + }, + { + "epoch": 0.06, + "learning_rate": 4.995123994198355e-05, + "loss": 0.7748, + "step": 1115 + }, + { + "epoch": 0.06, + "learning_rate": 4.9950801795341006e-05, + "loss": 0.9986, + "step": 1120 + }, + { + "epoch": 0.06, + "learning_rate": 4.995036169089e-05, + "loss": 0.9872, + "step": 1125 + }, + { + "epoch": 0.06, + "learning_rate": 4.994991962866505e-05, + "loss": 0.9266, + "step": 1130 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949475608700846e-05, + "loss": 0.9211, + "step": 1135 + }, + { + "epoch": 0.06, + "learning_rate": 4.994902963103224e-05, + "loss": 0.8535, + "step": 1140 + }, + { + "epoch": 0.06, + "learning_rate": 4.994858169569422e-05, + "loss": 0.692, + "step": 1145 + }, + { + "epoch": 0.06, + "learning_rate": 4.994813180272192e-05, + "loss": 0.7803, + "step": 1150 + }, + { + "epoch": 0.06, + "learning_rate": 4.994767995215067e-05, + "loss": 0.7417, + "step": 1155 + }, + { + "epoch": 0.06, + "learning_rate": 4.99472261440159e-05, + "loss": 0.7841, + "step": 1160 + }, + { + "epoch": 0.06, + "learning_rate": 4.9946770378353225e-05, + "loss": 0.8977, + "step": 1165 + }, + { + "epoch": 0.06, + "learning_rate": 4.994631265519842e-05, + "loss": 0.7649, + "step": 1170 + }, + { + "epoch": 0.06, + "learning_rate": 4.994585297458739e-05, + "loss": 0.9532, + "step": 1175 + }, + { + "epoch": 0.06, + "learning_rate": 4.99453913365562e-05, + "loss": 0.9149, + "step": 1180 + }, + { + "epoch": 0.06, + "learning_rate": 4.994492774114109e-05, + "loss": 0.7847, + "step": 1185 + }, + { + "epoch": 0.06, + "learning_rate": 4.994446218837842e-05, + "loss": 0.8236, + "step": 1190 + }, + { + "epoch": 0.06, + "learning_rate": 4.994399467830473e-05, + "loss": 0.8189, + "step": 1195 + }, + { + "epoch": 0.06, + "learning_rate": 4.99435252109567e-05, + "loss": 0.9335, + "step": 1200 + }, + { + "epoch": 0.06, + "learning_rate": 4.994305378637116e-05, + "loss": 0.9427, + "step": 1205 + }, + { + "epoch": 0.06, + "learning_rate": 4.994258040458513e-05, + "loss": 0.8016, + "step": 1210 + }, + { + "epoch": 0.06, + "learning_rate": 4.9942105065635713e-05, + "loss": 1.0169, + "step": 1215 + }, + { + "epoch": 0.07, + "learning_rate": 4.994162776956024e-05, + "loss": 0.8399, + "step": 1220 + }, + { + "epoch": 0.07, + "learning_rate": 4.994114851639615e-05, + "loss": 0.9022, + "step": 1225 + }, + { + "epoch": 0.07, + "learning_rate": 4.9940667306181056e-05, + "loss": 0.787, + "step": 1230 + }, + { + "epoch": 0.07, + "learning_rate": 4.994018413895271e-05, + "loss": 0.9417, + "step": 1235 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939699014749015e-05, + "loss": 0.834, + "step": 1240 + }, + { + "epoch": 0.07, + "learning_rate": 4.993921193360806e-05, + "loss": 0.9071, + "step": 1245 + }, + { + "epoch": 0.07, + "learning_rate": 4.993872289556804e-05, + "loss": 0.9327, + "step": 1250 + }, + { + "epoch": 0.07, + "learning_rate": 4.9938231900667345e-05, + "loss": 0.9787, + "step": 1255 + }, + { + "epoch": 0.07, + "learning_rate": 4.99377389489445e-05, + "loss": 0.8714, + "step": 1260 + }, + { + "epoch": 0.07, + "learning_rate": 4.993724404043819e-05, + "loss": 0.995, + "step": 1265 + }, + { + "epoch": 0.07, + "learning_rate": 4.993674717518723e-05, + "loss": 0.7538, + "step": 1270 + }, + { + "epoch": 0.07, + "learning_rate": 4.9936248353230624e-05, + "loss": 0.9466, + "step": 1275 + }, + { + "epoch": 0.07, + "learning_rate": 4.993574757460751e-05, + "loss": 1.0344, + "step": 1280 + }, + { + "epoch": 0.07, + "learning_rate": 4.993524483935718e-05, + "loss": 0.9573, + "step": 1285 + }, + { + "epoch": 0.07, + "learning_rate": 4.993474014751908e-05, + "loss": 0.9206, + "step": 1290 + }, + { + "epoch": 0.07, + "learning_rate": 4.9934233499132817e-05, + "loss": 0.9753, + "step": 1295 + }, + { + "epoch": 0.07, + "learning_rate": 4.9933724894238146e-05, + "loss": 0.7576, + "step": 1300 + }, + { + "epoch": 0.07, + "learning_rate": 4.993321433287496e-05, + "loss": 0.9328, + "step": 1305 + }, + { + "epoch": 0.07, + "learning_rate": 4.9932701815083346e-05, + "loss": 0.8051, + "step": 1310 + }, + { + "epoch": 0.07, + "learning_rate": 4.99321873409035e-05, + "loss": 0.8396, + "step": 1315 + }, + { + "epoch": 0.07, + "learning_rate": 4.9931670910375805e-05, + "loss": 0.8743, + "step": 1320 + }, + { + "epoch": 0.07, + "learning_rate": 4.993115252354077e-05, + "loss": 0.8973, + "step": 1325 + }, + { + "epoch": 0.07, + "learning_rate": 4.9930632180439085e-05, + "loss": 0.8204, + "step": 1330 + }, + { + "epoch": 0.07, + "learning_rate": 4.993010988111157e-05, + "loss": 0.8987, + "step": 1335 + }, + { + "epoch": 0.07, + "learning_rate": 4.992958562559921e-05, + "loss": 0.9644, + "step": 1340 + }, + { + "epoch": 0.07, + "learning_rate": 4.992905941394315e-05, + "loss": 0.8282, + "step": 1345 + }, + { + "epoch": 0.07, + "learning_rate": 4.9928531246184664e-05, + "loss": 0.7461, + "step": 1350 + }, + { + "epoch": 0.07, + "learning_rate": 4.992800112236521e-05, + "loss": 0.9401, + "step": 1355 + }, + { + "epoch": 0.07, + "learning_rate": 4.9927469042526374e-05, + "loss": 0.7926, + "step": 1360 + }, + { + "epoch": 0.07, + "learning_rate": 4.992693500670992e-05, + "loss": 0.9393, + "step": 1365 + }, + { + "epoch": 0.07, + "learning_rate": 4.9926399014957735e-05, + "loss": 0.7676, + "step": 1370 + }, + { + "epoch": 0.07, + "learning_rate": 4.992586106731189e-05, + "loss": 0.882, + "step": 1375 + }, + { + "epoch": 0.07, + "learning_rate": 4.992532116381459e-05, + "loss": 0.8941, + "step": 1380 + }, + { + "epoch": 0.07, + "learning_rate": 4.9924779304508205e-05, + "loss": 0.9527, + "step": 1385 + }, + { + "epoch": 0.07, + "learning_rate": 4.992423548943524e-05, + "loss": 0.9959, + "step": 1390 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923689718638386e-05, + "loss": 0.9233, + "step": 1395 + }, + { + "epoch": 0.07, + "learning_rate": 4.992314199216046e-05, + "loss": 0.8311, + "step": 1400 + }, + { + "epoch": 0.08, + "learning_rate": 4.992259231004443e-05, + "loss": 0.849, + "step": 1405 + }, + { + "epoch": 0.08, + "learning_rate": 4.9922040672333435e-05, + "loss": 0.8877, + "step": 1410 + }, + { + "epoch": 0.08, + "learning_rate": 4.992148707907077e-05, + "loss": 0.8116, + "step": 1415 + }, + { + "epoch": 0.08, + "learning_rate": 4.9920931530299854e-05, + "loss": 0.8211, + "step": 1420 + }, + { + "epoch": 0.08, + "learning_rate": 4.9920374026064294e-05, + "loss": 0.7091, + "step": 1425 + }, + { + "epoch": 0.08, + "learning_rate": 4.991981456640783e-05, + "loss": 0.8518, + "step": 1430 + }, + { + "epoch": 0.08, + "learning_rate": 4.9919253151374355e-05, + "loss": 0.7311, + "step": 1435 + }, + { + "epoch": 0.08, + "learning_rate": 4.9918689781007946e-05, + "loss": 0.9369, + "step": 1440 + }, + { + "epoch": 0.08, + "learning_rate": 4.9918124455352774e-05, + "loss": 0.8322, + "step": 1445 + }, + { + "epoch": 0.08, + "learning_rate": 4.991755717445322e-05, + "loss": 0.8642, + "step": 1450 + }, + { + "epoch": 0.08, + "learning_rate": 4.99169879383538e-05, + "loss": 0.8866, + "step": 1455 + }, + { + "epoch": 0.08, + "learning_rate": 4.9916416747099166e-05, + "loss": 0.9548, + "step": 1460 + }, + { + "epoch": 0.08, + "learning_rate": 4.991584360073415e-05, + "loss": 0.967, + "step": 1465 + }, + { + "epoch": 0.08, + "learning_rate": 4.991526849930371e-05, + "loss": 0.8791, + "step": 1470 + }, + { + "epoch": 0.08, + "learning_rate": 4.9914691442852976e-05, + "loss": 1.0645, + "step": 1475 + }, + { + "epoch": 0.08, + "learning_rate": 4.991411243142725e-05, + "loss": 0.9111, + "step": 1480 + }, + { + "epoch": 0.08, + "learning_rate": 4.991353146507194e-05, + "loss": 0.8932, + "step": 1485 + }, + { + "epoch": 0.08, + "learning_rate": 4.991294854383264e-05, + "loss": 0.8218, + "step": 1490 + }, + { + "epoch": 0.08, + "learning_rate": 4.991236366775509e-05, + "loss": 0.887, + "step": 1495 + }, + { + "epoch": 0.08, + "learning_rate": 4.991177683688518e-05, + "loss": 0.8274, + "step": 1500 + }, + { + "epoch": 0.08, + "learning_rate": 4.991118805126897e-05, + "loss": 0.9471, + "step": 1505 + }, + { + "epoch": 0.08, + "learning_rate": 4.991059731095265e-05, + "loss": 0.7971, + "step": 1510 + }, + { + "epoch": 0.08, + "learning_rate": 4.9910004615982565e-05, + "loss": 0.9093, + "step": 1515 + }, + { + "epoch": 0.08, + "learning_rate": 4.990940996640524e-05, + "loss": 0.8429, + "step": 1520 + }, + { + "epoch": 0.08, + "learning_rate": 4.990881336226732e-05, + "loss": 0.8453, + "step": 1525 + }, + { + "epoch": 0.08, + "learning_rate": 4.990821480361563e-05, + "loss": 0.8797, + "step": 1530 + }, + { + "epoch": 0.08, + "learning_rate": 4.990761429049713e-05, + "loss": 0.9274, + "step": 1535 + }, + { + "epoch": 0.08, + "learning_rate": 4.990701182295894e-05, + "loss": 0.8274, + "step": 1540 + }, + { + "epoch": 0.08, + "learning_rate": 4.990640740104834e-05, + "loss": 0.8599, + "step": 1545 + }, + { + "epoch": 0.08, + "learning_rate": 4.990580102481275e-05, + "loss": 0.7916, + "step": 1550 + }, + { + "epoch": 0.08, + "learning_rate": 4.990519269429975e-05, + "loss": 0.777, + "step": 1555 + }, + { + "epoch": 0.08, + "learning_rate": 4.990458240955708e-05, + "loss": 0.8698, + "step": 1560 + }, + { + "epoch": 0.08, + "learning_rate": 4.990397017063262e-05, + "loss": 0.7045, + "step": 1565 + }, + { + "epoch": 0.08, + "learning_rate": 4.990335597757442e-05, + "loss": 0.897, + "step": 1570 + }, + { + "epoch": 0.08, + "learning_rate": 4.990273983043067e-05, + "loss": 0.9374, + "step": 1575 + }, + { + "epoch": 0.08, + "learning_rate": 4.9902121729249704e-05, + "loss": 0.9631, + "step": 1580 + }, + { + "epoch": 0.08, + "learning_rate": 4.990150167408004e-05, + "loss": 0.9525, + "step": 1585 + }, + { + "epoch": 0.09, + "learning_rate": 4.9900879664970335e-05, + "loss": 0.7914, + "step": 1590 + }, + { + "epoch": 0.09, + "learning_rate": 4.990025570196937e-05, + "loss": 0.8088, + "step": 1595 + }, + { + "epoch": 0.09, + "learning_rate": 4.989962978512613e-05, + "loss": 0.9072, + "step": 1600 + }, + { + "epoch": 0.09, + "learning_rate": 4.989900191448972e-05, + "loss": 0.8763, + "step": 1605 + }, + { + "epoch": 0.09, + "learning_rate": 4.989837209010941e-05, + "loss": 0.8714, + "step": 1610 + }, + { + "epoch": 0.09, + "learning_rate": 4.9897740312034616e-05, + "loss": 0.8351, + "step": 1615 + }, + { + "epoch": 0.09, + "learning_rate": 4.9897106580314913e-05, + "loss": 0.9255, + "step": 1620 + }, + { + "epoch": 0.09, + "learning_rate": 4.989647089500003e-05, + "loss": 0.8401, + "step": 1625 + }, + { + "epoch": 0.09, + "learning_rate": 4.989583325613984e-05, + "loss": 0.7644, + "step": 1630 + }, + { + "epoch": 0.09, + "learning_rate": 4.989519366378438e-05, + "loss": 0.8625, + "step": 1635 + }, + { + "epoch": 0.09, + "learning_rate": 4.989455211798384e-05, + "loss": 1.0019, + "step": 1640 + }, + { + "epoch": 0.09, + "learning_rate": 4.989390861878856e-05, + "loss": 0.8116, + "step": 1645 + }, + { + "epoch": 0.09, + "learning_rate": 4.989326316624903e-05, + "loss": 0.9912, + "step": 1650 + }, + { + "epoch": 0.09, + "learning_rate": 4.9892615760415905e-05, + "loss": 0.7684, + "step": 1655 + }, + { + "epoch": 0.09, + "learning_rate": 4.9891966401339975e-05, + "loss": 0.8937, + "step": 1660 + }, + { + "epoch": 0.09, + "learning_rate": 4.989131508907219e-05, + "loss": 0.946, + "step": 1665 + }, + { + "epoch": 0.09, + "learning_rate": 4.989066182366367e-05, + "loss": 0.8702, + "step": 1670 + }, + { + "epoch": 0.09, + "learning_rate": 4.9890006605165665e-05, + "loss": 0.8557, + "step": 1675 + }, + { + "epoch": 0.09, + "learning_rate": 4.9889349433629584e-05, + "loss": 0.9027, + "step": 1680 + }, + { + "epoch": 0.09, + "learning_rate": 4.988869030910701e-05, + "loss": 0.7927, + "step": 1685 + }, + { + "epoch": 0.09, + "learning_rate": 4.988802923164965e-05, + "loss": 0.839, + "step": 1690 + }, + { + "epoch": 0.09, + "learning_rate": 4.9887366201309374e-05, + "loss": 0.9143, + "step": 1695 + }, + { + "epoch": 0.09, + "learning_rate": 4.9886701218138205e-05, + "loss": 0.934, + "step": 1700 + }, + { + "epoch": 0.09, + "learning_rate": 4.988603428218834e-05, + "loss": 0.8441, + "step": 1705 + }, + { + "epoch": 0.09, + "learning_rate": 4.988536539351209e-05, + "loss": 0.926, + "step": 1710 + }, + { + "epoch": 0.09, + "learning_rate": 4.988469455216196e-05, + "loss": 0.7813, + "step": 1715 + }, + { + "epoch": 0.09, + "learning_rate": 4.988402175819058e-05, + "loss": 0.8033, + "step": 1720 + }, + { + "epoch": 0.09, + "learning_rate": 4.988334701165074e-05, + "loss": 0.7388, + "step": 1725 + }, + { + "epoch": 0.09, + "learning_rate": 4.988267031259538e-05, + "loss": 0.9363, + "step": 1730 + }, + { + "epoch": 0.09, + "learning_rate": 4.988199166107761e-05, + "loss": 0.9499, + "step": 1735 + }, + { + "epoch": 0.09, + "learning_rate": 4.988131105715068e-05, + "loss": 0.9348, + "step": 1740 + }, + { + "epoch": 0.09, + "learning_rate": 4.9880628500867985e-05, + "loss": 0.8518, + "step": 1745 + }, + { + "epoch": 0.09, + "learning_rate": 4.987994399228309e-05, + "loss": 1.003, + "step": 1750 + }, + { + "epoch": 0.09, + "learning_rate": 4.987925753144971e-05, + "loss": 0.9232, + "step": 1755 + }, + { + "epoch": 0.09, + "learning_rate": 4.987856911842171e-05, + "loss": 0.9477, + "step": 1760 + }, + { + "epoch": 0.09, + "learning_rate": 4.987787875325309e-05, + "loss": 0.7504, + "step": 1765 + }, + { + "epoch": 0.09, + "learning_rate": 4.9877186435998045e-05, + "loss": 0.9137, + "step": 1770 + }, + { + "epoch": 0.09, + "learning_rate": 4.9876492166710884e-05, + "loss": 0.8655, + "step": 1775 + }, + { + "epoch": 0.1, + "learning_rate": 4.987579594544608e-05, + "loss": 0.6944, + "step": 1780 + }, + { + "epoch": 0.1, + "learning_rate": 4.987509777225827e-05, + "loss": 0.8168, + "step": 1785 + }, + { + "epoch": 0.1, + "learning_rate": 4.987439764720225e-05, + "loss": 0.9912, + "step": 1790 + }, + { + "epoch": 0.1, + "learning_rate": 4.9873695570332934e-05, + "loss": 0.9112, + "step": 1795 + }, + { + "epoch": 0.1, + "learning_rate": 4.9872991541705424e-05, + "loss": 0.8157, + "step": 1800 + }, + { + "epoch": 0.1, + "learning_rate": 4.987228556137497e-05, + "loss": 0.869, + "step": 1805 + }, + { + "epoch": 0.1, + "learning_rate": 4.987157762939695e-05, + "loss": 0.8406, + "step": 1810 + }, + { + "epoch": 0.1, + "learning_rate": 4.9870867745826923e-05, + "loss": 0.8385, + "step": 1815 + }, + { + "epoch": 0.1, + "learning_rate": 4.987015591072058e-05, + "loss": 0.9317, + "step": 1820 + }, + { + "epoch": 0.1, + "learning_rate": 4.9869442124133805e-05, + "loss": 0.8695, + "step": 1825 + }, + { + "epoch": 0.1, + "learning_rate": 4.9868726386122575e-05, + "loss": 0.9023, + "step": 1830 + }, + { + "epoch": 0.1, + "learning_rate": 4.986800869674307e-05, + "loss": 0.6973, + "step": 1835 + }, + { + "epoch": 0.1, + "learning_rate": 4.9867289056051605e-05, + "loss": 0.9736, + "step": 1840 + }, + { + "epoch": 0.1, + "learning_rate": 4.986656746410464e-05, + "loss": 0.806, + "step": 1845 + }, + { + "epoch": 0.1, + "learning_rate": 4.986584392095879e-05, + "loss": 0.6967, + "step": 1850 + }, + { + "epoch": 0.1, + "learning_rate": 4.9865118426670845e-05, + "loss": 0.8629, + "step": 1855 + }, + { + "epoch": 0.1, + "learning_rate": 4.986439098129772e-05, + "loss": 0.8126, + "step": 1860 + }, + { + "epoch": 0.1, + "learning_rate": 4.9863661584896506e-05, + "loss": 0.8886, + "step": 1865 + }, + { + "epoch": 0.1, + "learning_rate": 4.9862930237524425e-05, + "loss": 1.0021, + "step": 1870 + }, + { + "epoch": 0.1, + "learning_rate": 4.986219693923887e-05, + "loss": 0.7813, + "step": 1875 + }, + { + "epoch": 0.1, + "learning_rate": 4.9861461690097377e-05, + "loss": 0.719, + "step": 1880 + }, + { + "epoch": 0.1, + "learning_rate": 4.9860724490157645e-05, + "loss": 0.7863, + "step": 1885 + }, + { + "epoch": 0.1, + "learning_rate": 4.9859985339477516e-05, + "loss": 0.8206, + "step": 1890 + }, + { + "epoch": 0.1, + "learning_rate": 4.985924423811499e-05, + "loss": 0.8631, + "step": 1895 + }, + { + "epoch": 0.1, + "learning_rate": 4.985850118612821e-05, + "loss": 1.0037, + "step": 1900 + }, + { + "epoch": 0.1, + "learning_rate": 4.9857756183575494e-05, + "loss": 0.9073, + "step": 1905 + }, + { + "epoch": 0.1, + "learning_rate": 4.985700923051529e-05, + "loss": 0.8824, + "step": 1910 + }, + { + "epoch": 0.1, + "learning_rate": 4.9856260327006213e-05, + "loss": 0.8418, + "step": 1915 + }, + { + "epoch": 0.1, + "learning_rate": 4.985550947310703e-05, + "loss": 0.8256, + "step": 1920 + }, + { + "epoch": 0.1, + "learning_rate": 4.985475666887666e-05, + "loss": 0.8181, + "step": 1925 + }, + { + "epoch": 0.1, + "learning_rate": 4.985400191437416e-05, + "loss": 0.8676, + "step": 1930 + }, + { + "epoch": 0.1, + "learning_rate": 4.985324520965876e-05, + "loss": 0.9315, + "step": 1935 + }, + { + "epoch": 0.1, + "learning_rate": 4.985248655478984e-05, + "loss": 0.9193, + "step": 1940 + }, + { + "epoch": 0.1, + "learning_rate": 4.985172594982693e-05, + "loss": 0.6758, + "step": 1945 + }, + { + "epoch": 0.1, + "learning_rate": 4.985096339482971e-05, + "loss": 1.0395, + "step": 1950 + }, + { + "epoch": 0.1, + "learning_rate": 4.9850198889858005e-05, + "loss": 0.9421, + "step": 1955 + }, + { + "epoch": 0.1, + "learning_rate": 4.984943243497182e-05, + "loss": 0.8176, + "step": 1960 + }, + { + "epoch": 0.11, + "learning_rate": 4.984866403023129e-05, + "loss": 0.8059, + "step": 1965 + }, + { + "epoch": 0.11, + "learning_rate": 4.9847893675696697e-05, + "loss": 0.7081, + "step": 1970 + }, + { + "epoch": 0.11, + "learning_rate": 4.9847121371428504e-05, + "loss": 1.0082, + "step": 1975 + }, + { + "epoch": 0.11, + "learning_rate": 4.984634711748731e-05, + "loss": 0.8226, + "step": 1980 + }, + { + "epoch": 0.11, + "learning_rate": 4.984557091393387e-05, + "loss": 0.8726, + "step": 1985 + }, + { + "epoch": 0.11, + "learning_rate": 4.9844792760829075e-05, + "loss": 0.8124, + "step": 1990 + }, + { + "epoch": 0.11, + "learning_rate": 4.9844012658234e-05, + "loss": 0.9257, + "step": 1995 + }, + { + "epoch": 0.11, + "learning_rate": 4.9843230606209845e-05, + "loss": 0.8869, + "step": 2000 + }, + { + "epoch": 0.11, + "learning_rate": 4.984244660481798e-05, + "loss": 0.8605, + "step": 2005 + }, + { + "epoch": 0.11, + "learning_rate": 4.9841660654119924e-05, + "loss": 0.8098, + "step": 2010 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840872754177356e-05, + "loss": 0.9607, + "step": 2015 + }, + { + "epoch": 0.11, + "learning_rate": 4.984008290505209e-05, + "loss": 0.9345, + "step": 2020 + }, + { + "epoch": 0.11, + "learning_rate": 4.9839291106806095e-05, + "loss": 0.7879, + "step": 2025 + }, + { + "epoch": 0.11, + "learning_rate": 4.9838497359501524e-05, + "loss": 0.7783, + "step": 2030 + }, + { + "epoch": 0.11, + "learning_rate": 4.9837701663200636e-05, + "loss": 0.7527, + "step": 2035 + }, + { + "epoch": 0.11, + "learning_rate": 4.983690401796588e-05, + "loss": 0.8556, + "step": 2040 + }, + { + "epoch": 0.11, + "learning_rate": 4.983610442385984e-05, + "loss": 0.8758, + "step": 2045 + }, + { + "epoch": 0.11, + "learning_rate": 4.9835302880945266e-05, + "loss": 0.9996, + "step": 2050 + }, + { + "epoch": 0.11, + "learning_rate": 4.9834499389285036e-05, + "loss": 0.9905, + "step": 2055 + }, + { + "epoch": 0.11, + "learning_rate": 4.9833693948942217e-05, + "loss": 0.8837, + "step": 2060 + }, + { + "epoch": 0.11, + "learning_rate": 4.983288655998e-05, + "loss": 0.7579, + "step": 2065 + }, + { + "epoch": 0.11, + "learning_rate": 4.983207722246173e-05, + "loss": 0.9043, + "step": 2070 + }, + { + "epoch": 0.11, + "learning_rate": 4.983126593645092e-05, + "loss": 0.9022, + "step": 2075 + }, + { + "epoch": 0.11, + "learning_rate": 4.9830452702011236e-05, + "loss": 1.0319, + "step": 2080 + }, + { + "epoch": 0.11, + "learning_rate": 4.9829637519206486e-05, + "loss": 0.8624, + "step": 2085 + }, + { + "epoch": 0.11, + "learning_rate": 4.9828820388100625e-05, + "loss": 0.8045, + "step": 2090 + }, + { + "epoch": 0.11, + "learning_rate": 4.982800130875778e-05, + "loss": 0.9147, + "step": 2095 + }, + { + "epoch": 0.11, + "learning_rate": 4.982718028124223e-05, + "loss": 0.9279, + "step": 2100 + }, + { + "epoch": 0.11, + "learning_rate": 4.982635730561837e-05, + "loss": 0.8517, + "step": 2105 + }, + { + "epoch": 0.11, + "learning_rate": 4.98255323819508e-05, + "loss": 0.6737, + "step": 2110 + }, + { + "epoch": 0.11, + "learning_rate": 4.9824705510304247e-05, + "loss": 0.7965, + "step": 2115 + }, + { + "epoch": 0.11, + "learning_rate": 4.982387669074359e-05, + "loss": 0.7378, + "step": 2120 + }, + { + "epoch": 0.11, + "learning_rate": 4.9823045923333855e-05, + "loss": 0.8486, + "step": 2125 + }, + { + "epoch": 0.11, + "learning_rate": 4.982221320814024e-05, + "loss": 0.8096, + "step": 2130 + }, + { + "epoch": 0.11, + "learning_rate": 4.982137854522809e-05, + "loss": 0.7806, + "step": 2135 + }, + { + "epoch": 0.11, + "learning_rate": 4.982054193466289e-05, + "loss": 0.8132, + "step": 2140 + }, + { + "epoch": 0.11, + "learning_rate": 4.981970337651029e-05, + "loss": 0.7834, + "step": 2145 + }, + { + "epoch": 0.12, + "learning_rate": 4.981886287083607e-05, + "loss": 0.8362, + "step": 2150 + }, + { + "epoch": 0.12, + "learning_rate": 4.981802041770621e-05, + "loss": 0.9176, + "step": 2155 + }, + { + "epoch": 0.12, + "learning_rate": 4.981717601718681e-05, + "loss": 0.8825, + "step": 2160 + }, + { + "epoch": 0.12, + "learning_rate": 4.981632966934411e-05, + "loss": 0.9248, + "step": 2165 + }, + { + "epoch": 0.12, + "learning_rate": 4.981548137424453e-05, + "loss": 0.7836, + "step": 2170 + }, + { + "epoch": 0.12, + "learning_rate": 4.9814631131954635e-05, + "loss": 0.8113, + "step": 2175 + }, + { + "epoch": 0.12, + "learning_rate": 4.981377894254114e-05, + "loss": 0.8362, + "step": 2180 + }, + { + "epoch": 0.12, + "learning_rate": 4.981292480607091e-05, + "loss": 0.8749, + "step": 2185 + }, + { + "epoch": 0.12, + "learning_rate": 4.981206872261097e-05, + "loss": 0.9427, + "step": 2190 + }, + { + "epoch": 0.12, + "learning_rate": 4.98112106922285e-05, + "loss": 0.8892, + "step": 2195 + }, + { + "epoch": 0.12, + "learning_rate": 4.981035071499081e-05, + "loss": 0.8796, + "step": 2200 + }, + { + "epoch": 0.12, + "learning_rate": 4.980948879096539e-05, + "loss": 0.9027, + "step": 2205 + }, + { + "epoch": 0.12, + "learning_rate": 4.980862492021987e-05, + "loss": 0.8985, + "step": 2210 + }, + { + "epoch": 0.12, + "learning_rate": 4.9807759102822036e-05, + "loss": 0.8132, + "step": 2215 + }, + { + "epoch": 0.12, + "learning_rate": 4.980689133883983e-05, + "loss": 0.7887, + "step": 2220 + }, + { + "epoch": 0.12, + "learning_rate": 4.980602162834135e-05, + "loss": 0.8031, + "step": 2225 + }, + { + "epoch": 0.12, + "learning_rate": 4.9805149971394815e-05, + "loss": 0.6112, + "step": 2230 + }, + { + "epoch": 0.12, + "learning_rate": 4.9804276368068634e-05, + "loss": 0.7819, + "step": 2235 + }, + { + "epoch": 0.12, + "learning_rate": 4.980340081843137e-05, + "loss": 0.8431, + "step": 2240 + }, + { + "epoch": 0.12, + "learning_rate": 4.98025233225517e-05, + "loss": 0.8264, + "step": 2245 + }, + { + "epoch": 0.12, + "learning_rate": 4.980164388049849e-05, + "loss": 0.8417, + "step": 2250 + }, + { + "epoch": 0.12, + "learning_rate": 4.980076249234075e-05, + "loss": 0.8623, + "step": 2255 + }, + { + "epoch": 0.12, + "learning_rate": 4.979987915814763e-05, + "loss": 0.7658, + "step": 2260 + }, + { + "epoch": 0.12, + "learning_rate": 4.979899387798846e-05, + "loss": 0.8775, + "step": 2265 + }, + { + "epoch": 0.12, + "learning_rate": 4.979810665193269e-05, + "loss": 0.8722, + "step": 2270 + }, + { + "epoch": 0.12, + "learning_rate": 4.979721748004993e-05, + "loss": 0.7641, + "step": 2275 + }, + { + "epoch": 0.12, + "learning_rate": 4.979632636240997e-05, + "loss": 0.7558, + "step": 2280 + }, + { + "epoch": 0.12, + "learning_rate": 4.979543329908273e-05, + "loss": 0.8515, + "step": 2285 + }, + { + "epoch": 0.12, + "learning_rate": 4.979453829013827e-05, + "loss": 0.8285, + "step": 2290 + }, + { + "epoch": 0.12, + "learning_rate": 4.979364133564684e-05, + "loss": 0.9257, + "step": 2295 + }, + { + "epoch": 0.12, + "learning_rate": 4.97927424356788e-05, + "loss": 0.7933, + "step": 2300 + }, + { + "epoch": 0.12, + "learning_rate": 4.97918415903047e-05, + "loss": 0.7068, + "step": 2305 + }, + { + "epoch": 0.12, + "learning_rate": 4.979093879959521e-05, + "loss": 0.7912, + "step": 2310 + }, + { + "epoch": 0.12, + "learning_rate": 4.9790034063621196e-05, + "loss": 0.7526, + "step": 2315 + }, + { + "epoch": 0.12, + "learning_rate": 4.978912738245362e-05, + "loss": 0.8986, + "step": 2320 + }, + { + "epoch": 0.12, + "learning_rate": 4.978821875616364e-05, + "loss": 0.8767, + "step": 2325 + }, + { + "epoch": 0.12, + "learning_rate": 4.9787308184822554e-05, + "loss": 0.8165, + "step": 2330 + }, + { + "epoch": 0.12, + "learning_rate": 4.9786395668501815e-05, + "loss": 0.7368, + "step": 2335 + }, + { + "epoch": 0.13, + "learning_rate": 4.978548120727302e-05, + "loss": 0.9021, + "step": 2340 + }, + { + "epoch": 0.13, + "learning_rate": 4.978456480120792e-05, + "loss": 0.8389, + "step": 2345 + }, + { + "epoch": 0.13, + "learning_rate": 4.978364645037843e-05, + "loss": 0.7349, + "step": 2350 + }, + { + "epoch": 0.13, + "learning_rate": 4.97827261548566e-05, + "loss": 0.7653, + "step": 2355 + }, + { + "epoch": 0.13, + "learning_rate": 4.9781803914714654e-05, + "loss": 0.7587, + "step": 2360 + }, + { + "epoch": 0.13, + "learning_rate": 4.978087973002495e-05, + "loss": 0.8199, + "step": 2365 + }, + { + "epoch": 0.13, + "learning_rate": 4.9779953600860005e-05, + "loss": 0.7978, + "step": 2370 + }, + { + "epoch": 0.13, + "learning_rate": 4.97790255272925e-05, + "loss": 1.0208, + "step": 2375 + }, + { + "epoch": 0.13, + "learning_rate": 4.977809550939525e-05, + "loss": 0.7798, + "step": 2380 + }, + { + "epoch": 0.13, + "learning_rate": 4.977716354724122e-05, + "loss": 0.8695, + "step": 2385 + }, + { + "epoch": 0.13, + "learning_rate": 4.977622964090356e-05, + "loss": 0.8421, + "step": 2390 + }, + { + "epoch": 0.13, + "learning_rate": 4.9775293790455536e-05, + "loss": 0.7212, + "step": 2395 + }, + { + "epoch": 0.13, + "learning_rate": 4.977435599597058e-05, + "loss": 0.8967, + "step": 2400 + }, + { + "epoch": 0.13, + "learning_rate": 4.9773416257522286e-05, + "loss": 0.895, + "step": 2405 + }, + { + "epoch": 0.13, + "learning_rate": 4.977247457518439e-05, + "loss": 0.9811, + "step": 2410 + }, + { + "epoch": 0.13, + "learning_rate": 4.9771530949030776e-05, + "loss": 0.8655, + "step": 2415 + }, + { + "epoch": 0.13, + "learning_rate": 4.97705853791355e-05, + "loss": 0.9284, + "step": 2420 + }, + { + "epoch": 0.13, + "learning_rate": 4.976963786557274e-05, + "loss": 0.675, + "step": 2425 + }, + { + "epoch": 0.13, + "learning_rate": 4.976868840841686e-05, + "loss": 0.8499, + "step": 2430 + }, + { + "epoch": 0.13, + "learning_rate": 4.976773700774236e-05, + "loss": 1.0544, + "step": 2435 + }, + { + "epoch": 0.13, + "learning_rate": 4.9766783663623886e-05, + "loss": 0.9486, + "step": 2440 + }, + { + "epoch": 0.13, + "learning_rate": 4.976582837613624e-05, + "loss": 0.663, + "step": 2445 + }, + { + "epoch": 0.13, + "learning_rate": 4.97648711453544e-05, + "loss": 0.7747, + "step": 2450 + }, + { + "epoch": 0.13, + "learning_rate": 4.9763911971353447e-05, + "loss": 0.7663, + "step": 2455 + }, + { + "epoch": 0.13, + "learning_rate": 4.9762950854208666e-05, + "loss": 0.6607, + "step": 2460 + }, + { + "epoch": 0.13, + "learning_rate": 4.976198779399548e-05, + "loss": 0.813, + "step": 2465 + }, + { + "epoch": 0.13, + "learning_rate": 4.976102279078944e-05, + "loss": 0.7706, + "step": 2470 + }, + { + "epoch": 0.13, + "learning_rate": 4.976005584466626e-05, + "loss": 0.8038, + "step": 2475 + }, + { + "epoch": 0.13, + "learning_rate": 4.975908695570183e-05, + "loss": 0.8027, + "step": 2480 + }, + { + "epoch": 0.13, + "learning_rate": 4.9758116123972173e-05, + "loss": 0.8031, + "step": 2485 + }, + { + "epoch": 0.13, + "learning_rate": 4.975714334955346e-05, + "loss": 0.9296, + "step": 2490 + }, + { + "epoch": 0.13, + "learning_rate": 4.975616863252203e-05, + "loss": 0.9807, + "step": 2495 + }, + { + "epoch": 0.13, + "learning_rate": 4.9755191972954366e-05, + "loss": 0.9434, + "step": 2500 + }, + { + "epoch": 0.13, + "learning_rate": 4.9754213370927086e-05, + "loss": 0.8829, + "step": 2505 + }, + { + "epoch": 0.13, + "learning_rate": 4.975323282651701e-05, + "loss": 1.0117, + "step": 2510 + }, + { + "epoch": 0.13, + "learning_rate": 4.975225033980104e-05, + "loss": 1.1015, + "step": 2515 + }, + { + "epoch": 0.13, + "learning_rate": 4.9751265910856294e-05, + "loss": 0.9377, + "step": 2520 + }, + { + "epoch": 0.14, + "learning_rate": 4.975027953976e-05, + "loss": 0.7232, + "step": 2525 + }, + { + "epoch": 0.14, + "learning_rate": 4.9749291226589584e-05, + "loss": 0.7432, + "step": 2530 + }, + { + "epoch": 0.14, + "learning_rate": 4.974830097142257e-05, + "loss": 0.9603, + "step": 2535 + }, + { + "epoch": 0.14, + "learning_rate": 4.974730877433666e-05, + "loss": 0.6104, + "step": 2540 + }, + { + "epoch": 0.14, + "learning_rate": 4.974631463540973e-05, + "loss": 0.7624, + "step": 2545 + }, + { + "epoch": 0.14, + "learning_rate": 4.974531855471976e-05, + "loss": 0.893, + "step": 2550 + }, + { + "epoch": 0.14, + "learning_rate": 4.974432053234493e-05, + "loss": 0.7997, + "step": 2555 + }, + { + "epoch": 0.14, + "learning_rate": 4.9743320568363536e-05, + "loss": 0.8702, + "step": 2560 + }, + { + "epoch": 0.14, + "learning_rate": 4.974231866285406e-05, + "loss": 0.9106, + "step": 2565 + }, + { + "epoch": 0.14, + "learning_rate": 4.9741314815895104e-05, + "loss": 0.7824, + "step": 2570 + }, + { + "epoch": 0.14, + "learning_rate": 4.9740309027565434e-05, + "loss": 0.792, + "step": 2575 + }, + { + "epoch": 0.14, + "learning_rate": 4.973930129794398e-05, + "loss": 0.8312, + "step": 2580 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738291627109814e-05, + "loss": 0.9209, + "step": 2585 + }, + { + "epoch": 0.14, + "learning_rate": 4.9737280015142165e-05, + "loss": 0.7518, + "step": 2590 + }, + { + "epoch": 0.14, + "learning_rate": 4.9736266462120394e-05, + "loss": 0.845, + "step": 2595 + }, + { + "epoch": 0.14, + "learning_rate": 4.9735250968124054e-05, + "loss": 0.9205, + "step": 2600 + }, + { + "epoch": 0.14, + "learning_rate": 4.973423353323281e-05, + "loss": 0.7602, + "step": 2605 + }, + { + "epoch": 0.14, + "learning_rate": 4.973321415752651e-05, + "loss": 0.7718, + "step": 2610 + }, + { + "epoch": 0.14, + "learning_rate": 4.973219284108513e-05, + "loss": 0.9397, + "step": 2615 + }, + { + "epoch": 0.14, + "learning_rate": 4.9731169583988815e-05, + "loss": 0.5993, + "step": 2620 + }, + { + "epoch": 0.14, + "learning_rate": 4.973014438631786e-05, + "loss": 0.9764, + "step": 2625 + }, + { + "epoch": 0.14, + "learning_rate": 4.97291172481527e-05, + "loss": 0.8947, + "step": 2630 + }, + { + "epoch": 0.14, + "learning_rate": 4.972808816957394e-05, + "loss": 0.7173, + "step": 2635 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727057150662315e-05, + "loss": 0.7808, + "step": 2640 + }, + { + "epoch": 0.14, + "learning_rate": 4.9726024191498744e-05, + "loss": 0.8548, + "step": 2645 + }, + { + "epoch": 0.14, + "learning_rate": 4.972498929216427e-05, + "loss": 0.771, + "step": 2650 + }, + { + "epoch": 0.14, + "learning_rate": 4.9723952452740095e-05, + "loss": 0.7493, + "step": 2655 + }, + { + "epoch": 0.14, + "learning_rate": 4.972291367330759e-05, + "loss": 0.8956, + "step": 2660 + }, + { + "epoch": 0.14, + "learning_rate": 4.9721872953948244e-05, + "loss": 1.0156, + "step": 2665 + }, + { + "epoch": 0.14, + "learning_rate": 4.972083029474374e-05, + "loss": 0.9684, + "step": 2670 + }, + { + "epoch": 0.14, + "learning_rate": 4.971978569577587e-05, + "loss": 0.7829, + "step": 2675 + }, + { + "epoch": 0.14, + "learning_rate": 4.9718739157126627e-05, + "loss": 0.7946, + "step": 2680 + }, + { + "epoch": 0.14, + "learning_rate": 4.97176906788781e-05, + "loss": 0.8013, + "step": 2685 + }, + { + "epoch": 0.14, + "learning_rate": 4.971664026111259e-05, + "loss": 0.7803, + "step": 2690 + }, + { + "epoch": 0.14, + "learning_rate": 4.97155879039125e-05, + "loss": 0.8442, + "step": 2695 + }, + { + "epoch": 0.14, + "learning_rate": 4.9714533607360404e-05, + "loss": 0.9028, + "step": 2700 + }, + { + "epoch": 0.14, + "learning_rate": 4.971347737153904e-05, + "loss": 0.945, + "step": 2705 + }, + { + "epoch": 0.14, + "learning_rate": 4.971241919653128e-05, + "loss": 0.8276, + "step": 2710 + }, + { + "epoch": 0.15, + "learning_rate": 4.9711359082420156e-05, + "loss": 0.841, + "step": 2715 + }, + { + "epoch": 0.15, + "learning_rate": 4.971029702928886e-05, + "loss": 0.9866, + "step": 2720 + }, + { + "epoch": 0.15, + "learning_rate": 4.9709233037220713e-05, + "loss": 0.7829, + "step": 2725 + }, + { + "epoch": 0.15, + "learning_rate": 4.970816710629922e-05, + "loss": 0.8162, + "step": 2730 + }, + { + "epoch": 0.15, + "learning_rate": 4.9707099236608e-05, + "loss": 0.8764, + "step": 2735 + }, + { + "epoch": 0.15, + "learning_rate": 4.970602942823087e-05, + "loss": 0.7875, + "step": 2740 + }, + { + "epoch": 0.15, + "learning_rate": 4.970495768125176e-05, + "loss": 0.863, + "step": 2745 + }, + { + "epoch": 0.15, + "learning_rate": 4.9703883995754766e-05, + "loss": 0.9755, + "step": 2750 + }, + { + "epoch": 0.15, + "learning_rate": 4.9702808371824136e-05, + "loss": 0.7829, + "step": 2755 + }, + { + "epoch": 0.15, + "learning_rate": 4.9701730809544286e-05, + "loss": 0.7883, + "step": 2760 + }, + { + "epoch": 0.15, + "learning_rate": 4.970065130899974e-05, + "loss": 0.9066, + "step": 2765 + }, + { + "epoch": 0.15, + "learning_rate": 4.9699569870275236e-05, + "loss": 0.8159, + "step": 2770 + }, + { + "epoch": 0.15, + "learning_rate": 4.969848649345561e-05, + "loss": 0.8148, + "step": 2775 + }, + { + "epoch": 0.15, + "learning_rate": 4.969740117862587e-05, + "loss": 0.7785, + "step": 2780 + }, + { + "epoch": 0.15, + "learning_rate": 4.969631392587119e-05, + "loss": 0.8492, + "step": 2785 + }, + { + "epoch": 0.15, + "learning_rate": 4.9695224735276874e-05, + "loss": 0.8909, + "step": 2790 + }, + { + "epoch": 0.15, + "learning_rate": 4.9694133606928386e-05, + "loss": 0.8065, + "step": 2795 + }, + { + "epoch": 0.15, + "learning_rate": 4.9693040540911353e-05, + "loss": 1.0437, + "step": 2800 + }, + { + "epoch": 0.15, + "learning_rate": 4.9691945537311536e-05, + "loss": 0.7663, + "step": 2805 + }, + { + "epoch": 0.15, + "learning_rate": 4.969084859621486e-05, + "loss": 0.761, + "step": 2810 + }, + { + "epoch": 0.15, + "learning_rate": 4.968974971770739e-05, + "loss": 0.8197, + "step": 2815 + }, + { + "epoch": 0.15, + "learning_rate": 4.9688648901875365e-05, + "loss": 0.8564, + "step": 2820 + }, + { + "epoch": 0.15, + "learning_rate": 4.968754614880515e-05, + "loss": 0.7359, + "step": 2825 + }, + { + "epoch": 0.15, + "learning_rate": 4.968644145858328e-05, + "loss": 0.8015, + "step": 2830 + }, + { + "epoch": 0.15, + "learning_rate": 4.968533483129644e-05, + "loss": 0.7678, + "step": 2835 + }, + { + "epoch": 0.15, + "learning_rate": 4.9684226267031466e-05, + "loss": 0.7663, + "step": 2840 + }, + { + "epoch": 0.15, + "learning_rate": 4.968311576587533e-05, + "loss": 0.8433, + "step": 2845 + }, + { + "epoch": 0.15, + "learning_rate": 4.968200332791518e-05, + "loss": 0.8641, + "step": 2850 + }, + { + "epoch": 0.15, + "learning_rate": 4.96808889532383e-05, + "loss": 0.7179, + "step": 2855 + }, + { + "epoch": 0.15, + "learning_rate": 4.9679772641932134e-05, + "loss": 0.7958, + "step": 2860 + }, + { + "epoch": 0.15, + "learning_rate": 4.967865439408428e-05, + "loss": 0.7732, + "step": 2865 + }, + { + "epoch": 0.15, + "learning_rate": 4.9677534209782475e-05, + "loss": 0.7677, + "step": 2870 + }, + { + "epoch": 0.15, + "learning_rate": 4.967641208911461e-05, + "loss": 0.954, + "step": 2875 + }, + { + "epoch": 0.15, + "learning_rate": 4.967528803216876e-05, + "loss": 0.9279, + "step": 2880 + }, + { + "epoch": 0.15, + "learning_rate": 4.96741620390331e-05, + "loss": 0.8701, + "step": 2885 + }, + { + "epoch": 0.15, + "learning_rate": 4.9673034109796e-05, + "loss": 0.813, + "step": 2890 + }, + { + "epoch": 0.15, + "learning_rate": 4.967190424454595e-05, + "loss": 0.9161, + "step": 2895 + }, + { + "epoch": 0.16, + "learning_rate": 4.967077244337161e-05, + "loss": 0.8249, + "step": 2900 + }, + { + "epoch": 0.16, + "learning_rate": 4.966963870636181e-05, + "loss": 0.8396, + "step": 2905 + }, + { + "epoch": 0.16, + "learning_rate": 4.9668503033605485e-05, + "loss": 0.792, + "step": 2910 + }, + { + "epoch": 0.16, + "learning_rate": 4.966736542519176e-05, + "loss": 0.706, + "step": 2915 + }, + { + "epoch": 0.16, + "learning_rate": 4.966622588120989e-05, + "loss": 0.7794, + "step": 2920 + }, + { + "epoch": 0.16, + "learning_rate": 4.966508440174931e-05, + "loss": 0.8807, + "step": 2925 + }, + { + "epoch": 0.16, + "learning_rate": 4.966394098689956e-05, + "loss": 0.8132, + "step": 2930 + }, + { + "epoch": 0.16, + "learning_rate": 4.966279563675038e-05, + "loss": 0.7731, + "step": 2935 + }, + { + "epoch": 0.16, + "learning_rate": 4.966164835139164e-05, + "loss": 0.8436, + "step": 2940 + }, + { + "epoch": 0.16, + "learning_rate": 4.966049913091336e-05, + "loss": 0.7004, + "step": 2945 + }, + { + "epoch": 0.16, + "learning_rate": 4.9659347975405726e-05, + "loss": 0.8035, + "step": 2950 + }, + { + "epoch": 0.16, + "learning_rate": 4.9658194884959046e-05, + "loss": 0.8535, + "step": 2955 + }, + { + "epoch": 0.16, + "learning_rate": 4.965703985966381e-05, + "loss": 0.7824, + "step": 2960 + }, + { + "epoch": 0.16, + "learning_rate": 4.965588289961066e-05, + "loss": 0.8664, + "step": 2965 + }, + { + "epoch": 0.16, + "learning_rate": 4.965472400489035e-05, + "loss": 0.7619, + "step": 2970 + }, + { + "epoch": 0.16, + "learning_rate": 4.965356317559384e-05, + "loss": 0.8276, + "step": 2975 + }, + { + "epoch": 0.16, + "learning_rate": 4.965240041181222e-05, + "loss": 0.8057, + "step": 2980 + }, + { + "epoch": 0.16, + "learning_rate": 4.96512357136367e-05, + "loss": 0.909, + "step": 2985 + }, + { + "epoch": 0.16, + "learning_rate": 4.96500690811587e-05, + "loss": 0.8557, + "step": 2990 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648900514469745e-05, + "loss": 0.765, + "step": 2995 + }, + { + "epoch": 0.16, + "learning_rate": 4.964773001366153e-05, + "loss": 1.0673, + "step": 3000 + }, + { + "epoch": 0.16, + "learning_rate": 4.96465575788259e-05, + "loss": 0.8582, + "step": 3005 + }, + { + "epoch": 0.16, + "learning_rate": 4.9645383210054865e-05, + "loss": 0.9613, + "step": 3010 + }, + { + "epoch": 0.16, + "learning_rate": 4.9644206907440563e-05, + "loss": 0.8421, + "step": 3015 + }, + { + "epoch": 0.16, + "learning_rate": 4.9643028671075285e-05, + "loss": 0.9535, + "step": 3020 + }, + { + "epoch": 0.16, + "learning_rate": 4.96418485010515e-05, + "loss": 0.9872, + "step": 3025 + }, + { + "epoch": 0.16, + "learning_rate": 4.964066639746181e-05, + "loss": 0.9017, + "step": 3030 + }, + { + "epoch": 0.16, + "learning_rate": 4.963948236039897e-05, + "loss": 0.8539, + "step": 3035 + }, + { + "epoch": 0.16, + "learning_rate": 4.963829638995588e-05, + "loss": 0.8314, + "step": 3040 + }, + { + "epoch": 0.16, + "learning_rate": 4.9637108486225604e-05, + "loss": 0.8791, + "step": 3045 + }, + { + "epoch": 0.16, + "learning_rate": 4.963591864930135e-05, + "loss": 0.7487, + "step": 3050 + }, + { + "epoch": 0.16, + "learning_rate": 4.963472687927649e-05, + "loss": 0.8403, + "step": 3055 + }, + { + "epoch": 0.16, + "learning_rate": 4.9633533176244526e-05, + "loss": 0.7483, + "step": 3060 + }, + { + "epoch": 0.16, + "learning_rate": 4.963233754029913e-05, + "loss": 0.8811, + "step": 3065 + }, + { + "epoch": 0.16, + "learning_rate": 4.9631139971534126e-05, + "loss": 0.8106, + "step": 3070 + }, + { + "epoch": 0.16, + "learning_rate": 4.962994047004348e-05, + "loss": 0.8174, + "step": 3075 + }, + { + "epoch": 0.16, + "learning_rate": 4.96287390359213e-05, + "loss": 0.7392, + "step": 3080 + }, + { + "epoch": 0.17, + "learning_rate": 4.962753566926187e-05, + "loss": 0.7308, + "step": 3085 + }, + { + "epoch": 0.17, + "learning_rate": 4.962633037015961e-05, + "loss": 0.7862, + "step": 3090 + }, + { + "epoch": 0.17, + "learning_rate": 4.962512313870911e-05, + "loss": 0.7249, + "step": 3095 + }, + { + "epoch": 0.17, + "learning_rate": 4.962391397500508e-05, + "loss": 0.7166, + "step": 3100 + }, + { + "epoch": 0.17, + "learning_rate": 4.96227028791424e-05, + "loss": 0.861, + "step": 3105 + }, + { + "epoch": 0.17, + "learning_rate": 4.962148985121612e-05, + "loss": 0.8061, + "step": 3110 + }, + { + "epoch": 0.17, + "learning_rate": 4.962027489132139e-05, + "loss": 0.7095, + "step": 3115 + }, + { + "epoch": 0.17, + "learning_rate": 4.961905799955357e-05, + "loss": 0.839, + "step": 3120 + }, + { + "epoch": 0.17, + "learning_rate": 4.961783917600815e-05, + "loss": 0.8933, + "step": 3125 + }, + { + "epoch": 0.17, + "learning_rate": 4.9616618420780745e-05, + "loss": 0.8229, + "step": 3130 + }, + { + "epoch": 0.17, + "learning_rate": 4.961539573396715e-05, + "loss": 0.8342, + "step": 3135 + }, + { + "epoch": 0.17, + "learning_rate": 4.961417111566332e-05, + "loss": 0.7522, + "step": 3140 + }, + { + "epoch": 0.17, + "learning_rate": 4.961294456596533e-05, + "loss": 0.7861, + "step": 3145 + }, + { + "epoch": 0.17, + "learning_rate": 4.961171608496943e-05, + "loss": 0.7584, + "step": 3150 + }, + { + "epoch": 0.17, + "learning_rate": 4.961048567277201e-05, + "loss": 0.8426, + "step": 3155 + }, + { + "epoch": 0.17, + "learning_rate": 4.960925332946963e-05, + "loss": 0.8247, + "step": 3160 + }, + { + "epoch": 0.17, + "learning_rate": 4.960801905515897e-05, + "loss": 0.7422, + "step": 3165 + }, + { + "epoch": 0.17, + "learning_rate": 4.960678284993689e-05, + "loss": 0.7993, + "step": 3170 + }, + { + "epoch": 0.17, + "learning_rate": 4.96055447139004e-05, + "loss": 0.746, + "step": 3175 + }, + { + "epoch": 0.17, + "learning_rate": 4.960430464714663e-05, + "loss": 0.8515, + "step": 3180 + }, + { + "epoch": 0.17, + "learning_rate": 4.9603062649772895e-05, + "loss": 0.7546, + "step": 3185 + }, + { + "epoch": 0.17, + "learning_rate": 4.960181872187666e-05, + "loss": 0.7612, + "step": 3190 + }, + { + "epoch": 0.17, + "learning_rate": 4.9600572863555515e-05, + "loss": 0.8212, + "step": 3195 + }, + { + "epoch": 0.17, + "learning_rate": 4.959932507490723e-05, + "loss": 0.8165, + "step": 3200 + }, + { + "epoch": 0.17, + "learning_rate": 4.9598075356029713e-05, + "loss": 0.8134, + "step": 3205 + }, + { + "epoch": 0.17, + "learning_rate": 4.9596823707021026e-05, + "loss": 0.8584, + "step": 3210 + }, + { + "epoch": 0.17, + "learning_rate": 4.9595570127979375e-05, + "loss": 0.8665, + "step": 3215 + }, + { + "epoch": 0.17, + "learning_rate": 4.959431461900313e-05, + "loss": 0.8252, + "step": 3220 + }, + { + "epoch": 0.17, + "learning_rate": 4.95930571801908e-05, + "loss": 0.7456, + "step": 3225 + }, + { + "epoch": 0.17, + "learning_rate": 4.959179781164107e-05, + "loss": 0.7836, + "step": 3230 + }, + { + "epoch": 0.17, + "learning_rate": 4.959053651345275e-05, + "loss": 0.9068, + "step": 3235 + }, + { + "epoch": 0.17, + "learning_rate": 4.958927328572479e-05, + "loss": 0.9326, + "step": 3240 + }, + { + "epoch": 0.17, + "learning_rate": 4.95882613143405e-05, + "loss": 0.8543, + "step": 3245 + }, + { + "epoch": 0.17, + "learning_rate": 4.958699461369112e-05, + "loss": 0.845, + "step": 3250 + }, + { + "epoch": 0.17, + "learning_rate": 4.958572598378003e-05, + "loss": 0.765, + "step": 3255 + }, + { + "epoch": 0.17, + "learning_rate": 4.958445542470679e-05, + "loss": 0.8575, + "step": 3260 + }, + { + "epoch": 0.17, + "learning_rate": 4.958318293657108e-05, + "loss": 0.8214, + "step": 3265 + }, + { + "epoch": 0.17, + "learning_rate": 4.9581908519472766e-05, + "loss": 0.9318, + "step": 3270 + }, + { + "epoch": 0.18, + "learning_rate": 4.958063217351183e-05, + "loss": 0.8527, + "step": 3275 + }, + { + "epoch": 0.18, + "learning_rate": 4.9579353898788435e-05, + "loss": 0.9015, + "step": 3280 + }, + { + "epoch": 0.18, + "learning_rate": 4.957807369540289e-05, + "loss": 0.9015, + "step": 3285 + }, + { + "epoch": 0.18, + "learning_rate": 4.9576791563455625e-05, + "loss": 0.789, + "step": 3290 + }, + { + "epoch": 0.18, + "learning_rate": 4.957550750304727e-05, + "loss": 0.8802, + "step": 3295 + }, + { + "epoch": 0.18, + "learning_rate": 4.957422151427856e-05, + "loss": 0.8692, + "step": 3300 + }, + { + "epoch": 0.18, + "learning_rate": 4.9572933597250424e-05, + "loss": 0.9522, + "step": 3305 + }, + { + "epoch": 0.18, + "learning_rate": 4.9571643752063894e-05, + "loss": 0.8021, + "step": 3310 + }, + { + "epoch": 0.18, + "learning_rate": 4.95703519788202e-05, + "loss": 0.8531, + "step": 3315 + }, + { + "epoch": 0.18, + "learning_rate": 4.95690582776207e-05, + "loss": 1.0268, + "step": 3320 + }, + { + "epoch": 0.18, + "learning_rate": 4.95677626485669e-05, + "loss": 0.6887, + "step": 3325 + }, + { + "epoch": 0.18, + "learning_rate": 4.956646509176047e-05, + "loss": 0.6657, + "step": 3330 + }, + { + "epoch": 0.18, + "learning_rate": 4.9565165607303225e-05, + "loss": 0.6664, + "step": 3335 + }, + { + "epoch": 0.18, + "learning_rate": 4.956386419529713e-05, + "loss": 0.8534, + "step": 3340 + }, + { + "epoch": 0.18, + "learning_rate": 4.9562560855844295e-05, + "loss": 0.7976, + "step": 3345 + }, + { + "epoch": 0.18, + "learning_rate": 4.9561255589047e-05, + "loss": 0.7612, + "step": 3350 + }, + { + "epoch": 0.18, + "learning_rate": 4.955994839500766e-05, + "loss": 0.8446, + "step": 3355 + }, + { + "epoch": 0.18, + "learning_rate": 4.955863927382885e-05, + "loss": 0.7642, + "step": 3360 + }, + { + "epoch": 0.18, + "learning_rate": 4.955732822561329e-05, + "loss": 0.7925, + "step": 3365 + }, + { + "epoch": 0.18, + "learning_rate": 4.955601525046385e-05, + "loss": 0.9055, + "step": 3370 + }, + { + "epoch": 0.18, + "learning_rate": 4.955470034848355e-05, + "loss": 0.6796, + "step": 3375 + }, + { + "epoch": 0.18, + "learning_rate": 4.955338351977559e-05, + "loss": 0.8158, + "step": 3380 + }, + { + "epoch": 0.18, + "learning_rate": 4.9552064764443275e-05, + "loss": 0.8094, + "step": 3385 + }, + { + "epoch": 0.18, + "learning_rate": 4.955074408259008e-05, + "loss": 0.8934, + "step": 3390 + }, + { + "epoch": 0.18, + "learning_rate": 4.9549421474319654e-05, + "loss": 0.8261, + "step": 3395 + }, + { + "epoch": 0.18, + "learning_rate": 4.954809693973578e-05, + "loss": 0.9384, + "step": 3400 + }, + { + "epoch": 0.18, + "learning_rate": 4.954677047894236e-05, + "loss": 0.9363, + "step": 3405 + }, + { + "epoch": 0.18, + "learning_rate": 4.95454420920435e-05, + "loss": 0.9239, + "step": 3410 + }, + { + "epoch": 0.18, + "learning_rate": 4.9544111779143435e-05, + "loss": 0.9831, + "step": 3415 + }, + { + "epoch": 0.18, + "learning_rate": 4.954277954034654e-05, + "loss": 0.7436, + "step": 3420 + }, + { + "epoch": 0.18, + "learning_rate": 4.954144537575736e-05, + "loss": 0.8792, + "step": 3425 + }, + { + "epoch": 0.18, + "learning_rate": 4.954010928548057e-05, + "loss": 0.8038, + "step": 3430 + }, + { + "epoch": 0.18, + "learning_rate": 4.9538771269621024e-05, + "loss": 0.9046, + "step": 3435 + }, + { + "epoch": 0.18, + "learning_rate": 4.953743132828371e-05, + "loss": 0.8244, + "step": 3440 + }, + { + "epoch": 0.18, + "learning_rate": 4.953608946157376e-05, + "loss": 0.9734, + "step": 3445 + }, + { + "epoch": 0.18, + "learning_rate": 4.953474566959647e-05, + "loss": 0.7814, + "step": 3450 + }, + { + "epoch": 0.18, + "learning_rate": 4.9533399952457276e-05, + "loss": 0.8054, + "step": 3455 + }, + { + "epoch": 0.19, + "learning_rate": 4.953205231026179e-05, + "loss": 0.7926, + "step": 3460 + }, + { + "epoch": 0.19, + "learning_rate": 4.953070274311574e-05, + "loss": 0.8162, + "step": 3465 + }, + { + "epoch": 0.19, + "learning_rate": 4.9529351251125035e-05, + "loss": 0.7372, + "step": 3470 + }, + { + "epoch": 0.19, + "learning_rate": 4.9527997834395706e-05, + "loss": 0.8506, + "step": 3475 + }, + { + "epoch": 0.19, + "learning_rate": 4.952664249303397e-05, + "loss": 1.0776, + "step": 3480 + }, + { + "epoch": 0.19, + "learning_rate": 4.952528522714616e-05, + "loss": 0.8315, + "step": 3485 + }, + { + "epoch": 0.19, + "learning_rate": 4.9523926036838784e-05, + "loss": 0.9055, + "step": 3490 + }, + { + "epoch": 0.19, + "learning_rate": 4.952256492221849e-05, + "loss": 0.7596, + "step": 3495 + }, + { + "epoch": 0.19, + "learning_rate": 4.952120188339209e-05, + "loss": 0.7789, + "step": 3500 + }, + { + "epoch": 0.19, + "learning_rate": 4.9519836920466535e-05, + "loss": 0.8056, + "step": 3505 + }, + { + "epoch": 0.19, + "learning_rate": 4.951847003354892e-05, + "loss": 0.7515, + "step": 3510 + }, + { + "epoch": 0.19, + "learning_rate": 4.95171012227465e-05, + "loss": 0.8561, + "step": 3515 + }, + { + "epoch": 0.19, + "learning_rate": 4.951573048816668e-05, + "loss": 0.8129, + "step": 3520 + }, + { + "epoch": 0.19, + "learning_rate": 4.951435782991704e-05, + "loss": 0.716, + "step": 3525 + }, + { + "epoch": 0.19, + "learning_rate": 4.951298324810525e-05, + "loss": 0.8735, + "step": 3530 + }, + { + "epoch": 0.19, + "learning_rate": 4.95116067428392e-05, + "loss": 0.8052, + "step": 3535 + }, + { + "epoch": 0.19, + "learning_rate": 4.951022831422689e-05, + "loss": 0.8611, + "step": 3540 + }, + { + "epoch": 0.19, + "learning_rate": 4.9508847962376484e-05, + "loss": 0.8507, + "step": 3545 + }, + { + "epoch": 0.19, + "learning_rate": 4.9507465687396284e-05, + "loss": 0.8489, + "step": 3550 + }, + { + "epoch": 0.19, + "learning_rate": 4.9506081489394764e-05, + "loss": 0.8963, + "step": 3555 + }, + { + "epoch": 0.19, + "learning_rate": 4.950469536848053e-05, + "loss": 0.8285, + "step": 3560 + }, + { + "epoch": 0.19, + "learning_rate": 4.950330732476235e-05, + "loss": 0.6962, + "step": 3565 + }, + { + "epoch": 0.19, + "learning_rate": 4.9501917358349134e-05, + "loss": 0.8291, + "step": 3570 + }, + { + "epoch": 0.19, + "learning_rate": 4.9500525469349955e-05, + "loss": 0.9397, + "step": 3575 + }, + { + "epoch": 0.19, + "learning_rate": 4.949913165787403e-05, + "loss": 0.8595, + "step": 3580 + }, + { + "epoch": 0.19, + "learning_rate": 4.949773592403072e-05, + "loss": 0.8645, + "step": 3585 + }, + { + "epoch": 0.19, + "learning_rate": 4.949633826792955e-05, + "loss": 0.6876, + "step": 3590 + }, + { + "epoch": 0.19, + "learning_rate": 4.949493868968019e-05, + "loss": 0.6638, + "step": 3595 + }, + { + "epoch": 0.19, + "learning_rate": 4.9493537189392445e-05, + "loss": 0.9154, + "step": 3600 + }, + { + "epoch": 0.19, + "learning_rate": 4.949213376717631e-05, + "loss": 0.6988, + "step": 3605 + }, + { + "epoch": 0.19, + "learning_rate": 4.9490728423141896e-05, + "loss": 0.716, + "step": 3610 + }, + { + "epoch": 0.19, + "learning_rate": 4.9489321157399475e-05, + "loss": 0.7978, + "step": 3615 + }, + { + "epoch": 0.19, + "learning_rate": 4.948791197005947e-05, + "loss": 0.866, + "step": 3620 + }, + { + "epoch": 0.19, + "learning_rate": 4.948650086123245e-05, + "loss": 0.9086, + "step": 3625 + }, + { + "epoch": 0.19, + "learning_rate": 4.948508783102916e-05, + "loss": 0.894, + "step": 3630 + }, + { + "epoch": 0.19, + "learning_rate": 4.9483672879560454e-05, + "loss": 0.8542, + "step": 3635 + }, + { + "epoch": 0.19, + "learning_rate": 4.948225600693737e-05, + "loss": 0.8554, + "step": 3640 + }, + { + "epoch": 0.19, + "learning_rate": 4.948083721327108e-05, + "loss": 0.8713, + "step": 3645 + }, + { + "epoch": 0.2, + "learning_rate": 4.947941649867292e-05, + "loss": 0.897, + "step": 3650 + }, + { + "epoch": 0.2, + "learning_rate": 4.947799386325436e-05, + "loss": 0.8087, + "step": 3655 + }, + { + "epoch": 0.2, + "learning_rate": 4.947656930712703e-05, + "loss": 0.8883, + "step": 3660 + }, + { + "epoch": 0.2, + "learning_rate": 4.947514283040272e-05, + "loss": 0.8465, + "step": 3665 + }, + { + "epoch": 0.2, + "learning_rate": 4.947371443319335e-05, + "loss": 0.8383, + "step": 3670 + }, + { + "epoch": 0.2, + "learning_rate": 4.9472284115611005e-05, + "loss": 0.8494, + "step": 3675 + }, + { + "epoch": 0.2, + "learning_rate": 4.947085187776792e-05, + "loss": 0.7423, + "step": 3680 + }, + { + "epoch": 0.2, + "learning_rate": 4.9469417719776477e-05, + "loss": 0.7621, + "step": 3685 + }, + { + "epoch": 0.2, + "learning_rate": 4.9467981641749216e-05, + "loss": 0.7575, + "step": 3690 + }, + { + "epoch": 0.2, + "learning_rate": 4.9466543643798805e-05, + "loss": 0.8, + "step": 3695 + }, + { + "epoch": 0.2, + "learning_rate": 4.9465103726038096e-05, + "loss": 0.7279, + "step": 3700 + }, + { + "epoch": 0.2, + "learning_rate": 4.946366188858006e-05, + "loss": 0.9188, + "step": 3705 + }, + { + "epoch": 0.2, + "learning_rate": 4.9462218131537845e-05, + "loss": 0.901, + "step": 3710 + }, + { + "epoch": 0.2, + "learning_rate": 4.946077245502474e-05, + "loss": 0.8387, + "step": 3715 + }, + { + "epoch": 0.2, + "learning_rate": 4.9459324859154167e-05, + "loss": 0.7089, + "step": 3720 + }, + { + "epoch": 0.2, + "learning_rate": 4.945787534403973e-05, + "loss": 0.8077, + "step": 3725 + }, + { + "epoch": 0.2, + "learning_rate": 4.945642390979516e-05, + "loss": 0.8124, + "step": 3730 + }, + { + "epoch": 0.2, + "learning_rate": 4.945497055653434e-05, + "loss": 0.8187, + "step": 3735 + }, + { + "epoch": 0.2, + "learning_rate": 4.945351528437132e-05, + "loss": 0.8432, + "step": 3740 + }, + { + "epoch": 0.2, + "learning_rate": 4.945205809342029e-05, + "loss": 1.0281, + "step": 3745 + }, + { + "epoch": 0.2, + "learning_rate": 4.945059898379559e-05, + "loss": 0.946, + "step": 3750 + }, + { + "epoch": 0.2, + "learning_rate": 4.944913795561171e-05, + "loss": 0.9212, + "step": 3755 + }, + { + "epoch": 0.2, + "learning_rate": 4.9447675008983295e-05, + "loss": 0.9404, + "step": 3760 + }, + { + "epoch": 0.2, + "learning_rate": 4.9446210144025134e-05, + "loss": 0.8842, + "step": 3765 + }, + { + "epoch": 0.2, + "learning_rate": 4.944474336085217e-05, + "loss": 0.7175, + "step": 3770 + }, + { + "epoch": 0.2, + "learning_rate": 4.94432746595795e-05, + "loss": 0.8221, + "step": 3775 + }, + { + "epoch": 0.2, + "learning_rate": 4.944180404032236e-05, + "loss": 0.6941, + "step": 3780 + }, + { + "epoch": 0.2, + "learning_rate": 4.9440331503196165e-05, + "loss": 0.9288, + "step": 3785 + }, + { + "epoch": 0.2, + "learning_rate": 4.9438857048316434e-05, + "loss": 0.7445, + "step": 3790 + }, + { + "epoch": 0.2, + "learning_rate": 4.943738067579888e-05, + "loss": 0.8878, + "step": 3795 + }, + { + "epoch": 0.2, + "learning_rate": 4.943590238575935e-05, + "loss": 0.8777, + "step": 3800 + }, + { + "epoch": 0.2, + "learning_rate": 4.943442217831382e-05, + "loss": 0.9116, + "step": 3805 + }, + { + "epoch": 0.2, + "learning_rate": 4.943294005357846e-05, + "loss": 0.9225, + "step": 3810 + }, + { + "epoch": 0.2, + "learning_rate": 4.943145601166956e-05, + "loss": 0.8456, + "step": 3815 + }, + { + "epoch": 0.2, + "learning_rate": 4.942997005270357e-05, + "loss": 0.7643, + "step": 3820 + }, + { + "epoch": 0.2, + "learning_rate": 4.942848217679709e-05, + "loss": 0.7678, + "step": 3825 + }, + { + "epoch": 0.2, + "learning_rate": 4.942699238406685e-05, + "loss": 0.796, + "step": 3830 + }, + { + "epoch": 0.21, + "learning_rate": 4.9425500674629775e-05, + "loss": 0.9026, + "step": 3835 + }, + { + "epoch": 0.21, + "learning_rate": 4.9424007048602905e-05, + "loss": 0.8451, + "step": 3840 + }, + { + "epoch": 0.21, + "learning_rate": 4.942251150610343e-05, + "loss": 0.8219, + "step": 3845 + }, + { + "epoch": 0.21, + "learning_rate": 4.9421014047248716e-05, + "loss": 0.758, + "step": 3850 + }, + { + "epoch": 0.21, + "learning_rate": 4.9419514672156255e-05, + "loss": 0.8383, + "step": 3855 + }, + { + "epoch": 0.21, + "learning_rate": 4.941801338094369e-05, + "loss": 0.858, + "step": 3860 + }, + { + "epoch": 0.21, + "learning_rate": 4.941651017372885e-05, + "loss": 0.8233, + "step": 3865 + }, + { + "epoch": 0.21, + "learning_rate": 4.9415005050629654e-05, + "loss": 0.9621, + "step": 3870 + }, + { + "epoch": 0.21, + "learning_rate": 4.941349801176422e-05, + "loss": 0.9742, + "step": 3875 + }, + { + "epoch": 0.21, + "learning_rate": 4.9411989057250805e-05, + "loss": 0.8342, + "step": 3880 + }, + { + "epoch": 0.21, + "learning_rate": 4.941047818720781e-05, + "loss": 0.7426, + "step": 3885 + }, + { + "epoch": 0.21, + "learning_rate": 4.9408965401753776e-05, + "loss": 0.857, + "step": 3890 + }, + { + "epoch": 0.21, + "learning_rate": 4.9407450701007416e-05, + "loss": 0.8559, + "step": 3895 + }, + { + "epoch": 0.21, + "learning_rate": 4.9405934085087584e-05, + "loss": 0.7515, + "step": 3900 + }, + { + "epoch": 0.21, + "learning_rate": 4.940441555411328e-05, + "loss": 0.8281, + "step": 3905 + }, + { + "epoch": 0.21, + "learning_rate": 4.940289510820367e-05, + "loss": 0.9546, + "step": 3910 + }, + { + "epoch": 0.21, + "learning_rate": 4.9401372747478035e-05, + "loss": 0.8277, + "step": 3915 + }, + { + "epoch": 0.21, + "learning_rate": 4.939984847205585e-05, + "loss": 0.9051, + "step": 3920 + }, + { + "epoch": 0.21, + "learning_rate": 4.939832228205672e-05, + "loss": 0.9353, + "step": 3925 + }, + { + "epoch": 0.21, + "learning_rate": 4.9396794177600384e-05, + "loss": 0.928, + "step": 3930 + }, + { + "epoch": 0.21, + "learning_rate": 4.9395264158806764e-05, + "loss": 0.7046, + "step": 3935 + }, + { + "epoch": 0.21, + "learning_rate": 4.9393732225795906e-05, + "loss": 0.7513, + "step": 3940 + }, + { + "epoch": 0.21, + "learning_rate": 4.939219837868801e-05, + "loss": 0.7943, + "step": 3945 + }, + { + "epoch": 0.21, + "learning_rate": 4.939066261760346e-05, + "loss": 0.7632, + "step": 3950 + }, + { + "epoch": 0.21, + "learning_rate": 4.9389124942662727e-05, + "loss": 0.7753, + "step": 3955 + }, + { + "epoch": 0.21, + "learning_rate": 4.938758535398649e-05, + "loss": 0.8438, + "step": 3960 + }, + { + "epoch": 0.21, + "learning_rate": 4.938604385169555e-05, + "loss": 0.8776, + "step": 3965 + }, + { + "epoch": 0.21, + "learning_rate": 4.938450043591085e-05, + "loss": 0.6746, + "step": 3970 + }, + { + "epoch": 0.21, + "learning_rate": 4.938295510675352e-05, + "loss": 0.7182, + "step": 3975 + }, + { + "epoch": 0.21, + "learning_rate": 4.938140786434481e-05, + "loss": 0.9462, + "step": 3980 + }, + { + "epoch": 0.21, + "learning_rate": 4.937985870880612e-05, + "loss": 0.8194, + "step": 3985 + }, + { + "epoch": 0.21, + "learning_rate": 4.9378307640259005e-05, + "loss": 1.0525, + "step": 3990 + }, + { + "epoch": 0.21, + "learning_rate": 4.937675465882517e-05, + "loss": 0.8388, + "step": 3995 + }, + { + "epoch": 0.21, + "learning_rate": 4.93751997646265e-05, + "loss": 0.8251, + "step": 4000 + }, + { + "epoch": 0.21, + "learning_rate": 4.937364295778497e-05, + "loss": 0.7671, + "step": 4005 + }, + { + "epoch": 0.21, + "learning_rate": 4.937208423842276e-05, + "loss": 1.005, + "step": 4010 + }, + { + "epoch": 0.21, + "learning_rate": 4.9370523606662155e-05, + "loss": 0.9077, + "step": 4015 + }, + { + "epoch": 0.22, + "learning_rate": 4.9368961062625626e-05, + "loss": 0.943, + "step": 4020 + }, + { + "epoch": 0.22, + "learning_rate": 4.9367396606435787e-05, + "loss": 0.7335, + "step": 4025 + }, + { + "epoch": 0.22, + "learning_rate": 4.9365830238215395e-05, + "loss": 0.8139, + "step": 4030 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364261958087346e-05, + "loss": 0.8767, + "step": 4035 + }, + { + "epoch": 0.22, + "learning_rate": 4.9362691766174704e-05, + "loss": 0.8758, + "step": 4040 + }, + { + "epoch": 0.22, + "learning_rate": 4.9361119662600674e-05, + "loss": 0.8697, + "step": 4045 + }, + { + "epoch": 0.22, + "learning_rate": 4.9359545647488624e-05, + "loss": 0.8138, + "step": 4050 + }, + { + "epoch": 0.22, + "learning_rate": 4.935796972096205e-05, + "loss": 0.8757, + "step": 4055 + }, + { + "epoch": 0.22, + "learning_rate": 4.935639188314461e-05, + "loss": 0.7957, + "step": 4060 + }, + { + "epoch": 0.22, + "learning_rate": 4.935481213416012e-05, + "loss": 0.8402, + "step": 4065 + }, + { + "epoch": 0.22, + "learning_rate": 4.9353230474132536e-05, + "loss": 0.8074, + "step": 4070 + }, + { + "epoch": 0.22, + "learning_rate": 4.935164690318596e-05, + "loss": 0.6974, + "step": 4075 + }, + { + "epoch": 0.22, + "learning_rate": 4.935006142144465e-05, + "loss": 0.8779, + "step": 4080 + }, + { + "epoch": 0.22, + "learning_rate": 4.934847402903303e-05, + "loss": 0.702, + "step": 4085 + }, + { + "epoch": 0.22, + "learning_rate": 4.934688472607563e-05, + "loss": 0.8912, + "step": 4090 + }, + { + "epoch": 0.22, + "learning_rate": 4.9345293512697175e-05, + "loss": 0.8372, + "step": 4095 + }, + { + "epoch": 0.22, + "learning_rate": 4.934370038902253e-05, + "loss": 0.7317, + "step": 4100 + }, + { + "epoch": 0.22, + "learning_rate": 4.934210535517668e-05, + "loss": 0.901, + "step": 4105 + }, + { + "epoch": 0.22, + "learning_rate": 4.9340508411284804e-05, + "loss": 0.7522, + "step": 4110 + }, + { + "epoch": 0.22, + "learning_rate": 4.933890955747219e-05, + "loss": 0.7643, + "step": 4115 + }, + { + "epoch": 0.22, + "learning_rate": 4.93373087938643e-05, + "loss": 0.866, + "step": 4120 + }, + { + "epoch": 0.22, + "learning_rate": 4.9335706120586754e-05, + "loss": 0.8287, + "step": 4125 + }, + { + "epoch": 0.22, + "learning_rate": 4.9334101537765296e-05, + "loss": 0.769, + "step": 4130 + }, + { + "epoch": 0.22, + "learning_rate": 4.933249504552583e-05, + "loss": 0.9201, + "step": 4135 + }, + { + "epoch": 0.22, + "learning_rate": 4.933088664399442e-05, + "loss": 0.6929, + "step": 4140 + }, + { + "epoch": 0.22, + "learning_rate": 4.932927633329727e-05, + "loss": 0.7547, + "step": 4145 + }, + { + "epoch": 0.22, + "learning_rate": 4.932766411356074e-05, + "loss": 0.7361, + "step": 4150 + }, + { + "epoch": 0.22, + "learning_rate": 4.9326049984911326e-05, + "loss": 0.7689, + "step": 4155 + }, + { + "epoch": 0.22, + "learning_rate": 4.932443394747569e-05, + "loss": 0.7908, + "step": 4160 + }, + { + "epoch": 0.22, + "learning_rate": 4.9322816001380635e-05, + "loss": 0.9189, + "step": 4165 + }, + { + "epoch": 0.22, + "learning_rate": 4.9321196146753114e-05, + "loss": 0.8301, + "step": 4170 + }, + { + "epoch": 0.22, + "learning_rate": 4.931957438372024e-05, + "loss": 0.8996, + "step": 4175 + }, + { + "epoch": 0.22, + "learning_rate": 4.931795071240926e-05, + "loss": 0.764, + "step": 4180 + }, + { + "epoch": 0.22, + "learning_rate": 4.931632513294758e-05, + "loss": 0.803, + "step": 4185 + }, + { + "epoch": 0.22, + "learning_rate": 4.931469764546275e-05, + "loss": 0.9746, + "step": 4190 + }, + { + "epoch": 0.22, + "learning_rate": 4.9313068250082486e-05, + "loss": 0.8741, + "step": 4195 + }, + { + "epoch": 0.22, + "learning_rate": 4.9311436946934624e-05, + "loss": 0.9009, + "step": 4200 + }, + { + "epoch": 0.22, + "learning_rate": 4.930980373614718e-05, + "loss": 0.7202, + "step": 4205 + }, + { + "epoch": 0.23, + "learning_rate": 4.9308168617848304e-05, + "loss": 0.8097, + "step": 4210 + }, + { + "epoch": 0.23, + "learning_rate": 4.9306531592166294e-05, + "loss": 0.6738, + "step": 4215 + }, + { + "epoch": 0.23, + "learning_rate": 4.930489265922961e-05, + "loss": 0.7737, + "step": 4220 + }, + { + "epoch": 0.23, + "learning_rate": 4.930325181916684e-05, + "loss": 0.8228, + "step": 4225 + }, + { + "epoch": 0.23, + "learning_rate": 4.9301609072106756e-05, + "loss": 0.8239, + "step": 4230 + }, + { + "epoch": 0.23, + "learning_rate": 4.929996441817825e-05, + "loss": 0.9161, + "step": 4235 + }, + { + "epoch": 0.23, + "learning_rate": 4.9298317857510354e-05, + "loss": 0.9567, + "step": 4240 + }, + { + "epoch": 0.23, + "learning_rate": 4.9296669390232294e-05, + "loss": 0.8271, + "step": 4245 + }, + { + "epoch": 0.23, + "learning_rate": 4.929501901647341e-05, + "loss": 0.6943, + "step": 4250 + }, + { + "epoch": 0.23, + "learning_rate": 4.9293366736363205e-05, + "loss": 0.6149, + "step": 4255 + }, + { + "epoch": 0.23, + "learning_rate": 4.929171255003132e-05, + "loss": 0.7123, + "step": 4260 + }, + { + "epoch": 0.23, + "learning_rate": 4.929005645760756e-05, + "loss": 0.8766, + "step": 4265 + }, + { + "epoch": 0.23, + "learning_rate": 4.9288398459221875e-05, + "loss": 0.9577, + "step": 4270 + }, + { + "epoch": 0.23, + "learning_rate": 4.928673855500435e-05, + "loss": 0.7385, + "step": 4275 + }, + { + "epoch": 0.23, + "learning_rate": 4.9285076745085256e-05, + "loss": 0.8711, + "step": 4280 + }, + { + "epoch": 0.23, + "learning_rate": 4.9283413029594964e-05, + "loss": 0.8776, + "step": 4285 + }, + { + "epoch": 0.23, + "learning_rate": 4.928174740866404e-05, + "loss": 0.8267, + "step": 4290 + }, + { + "epoch": 0.23, + "learning_rate": 4.928007988242317e-05, + "loss": 0.7509, + "step": 4295 + }, + { + "epoch": 0.23, + "learning_rate": 4.92784104510032e-05, + "loss": 1.0717, + "step": 4300 + }, + { + "epoch": 0.23, + "learning_rate": 4.927673911453512e-05, + "loss": 0.7941, + "step": 4305 + }, + { + "epoch": 0.23, + "learning_rate": 4.9275065873150086e-05, + "loss": 0.8065, + "step": 4310 + }, + { + "epoch": 0.23, + "learning_rate": 4.9273390726979384e-05, + "loss": 0.7789, + "step": 4315 + }, + { + "epoch": 0.23, + "learning_rate": 4.9271713676154465e-05, + "loss": 0.8482, + "step": 4320 + }, + { + "epoch": 0.23, + "learning_rate": 4.927003472080691e-05, + "loss": 0.9035, + "step": 4325 + }, + { + "epoch": 0.23, + "learning_rate": 4.9268353861068475e-05, + "loss": 0.8393, + "step": 4330 + }, + { + "epoch": 0.23, + "learning_rate": 4.926667109707104e-05, + "loss": 1.0194, + "step": 4335 + }, + { + "epoch": 0.23, + "learning_rate": 4.9264986428946644e-05, + "loss": 0.8048, + "step": 4340 + }, + { + "epoch": 0.23, + "learning_rate": 4.926329985682749e-05, + "loss": 0.8594, + "step": 4345 + }, + { + "epoch": 0.23, + "learning_rate": 4.9261611380845915e-05, + "loss": 0.8202, + "step": 4350 + }, + { + "epoch": 0.23, + "learning_rate": 4.9259921001134404e-05, + "loss": 0.8064, + "step": 4355 + }, + { + "epoch": 0.23, + "learning_rate": 4.925822871782559e-05, + "loss": 0.804, + "step": 4360 + }, + { + "epoch": 0.23, + "learning_rate": 4.9256534531052265e-05, + "loss": 0.9993, + "step": 4365 + }, + { + "epoch": 0.23, + "learning_rate": 4.9254838440947374e-05, + "loss": 0.904, + "step": 4370 + }, + { + "epoch": 0.23, + "learning_rate": 4.9253140447644e-05, + "loss": 0.8427, + "step": 4375 + }, + { + "epoch": 0.23, + "learning_rate": 4.925144055127537e-05, + "loss": 0.8224, + "step": 4380 + }, + { + "epoch": 0.23, + "learning_rate": 4.9249738751974886e-05, + "loss": 0.9162, + "step": 4385 + }, + { + "epoch": 0.23, + "learning_rate": 4.9248035049876074e-05, + "loss": 0.8402, + "step": 4390 + }, + { + "epoch": 0.24, + "learning_rate": 4.9246329445112604e-05, + "loss": 0.7632, + "step": 4395 + }, + { + "epoch": 0.24, + "learning_rate": 4.9244621937818335e-05, + "loss": 0.73, + "step": 4400 + }, + { + "epoch": 0.24, + "learning_rate": 4.924291252812723e-05, + "loss": 0.747, + "step": 4405 + }, + { + "epoch": 0.24, + "learning_rate": 4.9241201216173435e-05, + "loss": 0.7069, + "step": 4410 + }, + { + "epoch": 0.24, + "learning_rate": 4.923948800209122e-05, + "loss": 0.811, + "step": 4415 + }, + { + "epoch": 0.24, + "learning_rate": 4.923777288601502e-05, + "loss": 0.8599, + "step": 4420 + }, + { + "epoch": 0.24, + "learning_rate": 4.923605586807941e-05, + "loss": 0.8569, + "step": 4425 + }, + { + "epoch": 0.24, + "learning_rate": 4.923433694841913e-05, + "loss": 0.8376, + "step": 4430 + }, + { + "epoch": 0.24, + "learning_rate": 4.9232616127169045e-05, + "loss": 0.6783, + "step": 4435 + }, + { + "epoch": 0.24, + "learning_rate": 4.923089340446419e-05, + "loss": 0.7608, + "step": 4440 + }, + { + "epoch": 0.24, + "learning_rate": 4.922916878043974e-05, + "loss": 0.9222, + "step": 4445 + }, + { + "epoch": 0.24, + "learning_rate": 4.922744225523102e-05, + "loss": 0.8178, + "step": 4450 + }, + { + "epoch": 0.24, + "learning_rate": 4.922571382897351e-05, + "loss": 0.7488, + "step": 4455 + }, + { + "epoch": 0.24, + "learning_rate": 4.922398350180282e-05, + "loss": 0.9123, + "step": 4460 + }, + { + "epoch": 0.24, + "learning_rate": 4.9222251273854734e-05, + "loss": 0.7817, + "step": 4465 + }, + { + "epoch": 0.24, + "learning_rate": 4.9220517145265175e-05, + "loss": 0.8018, + "step": 4470 + }, + { + "epoch": 0.24, + "learning_rate": 4.9218781116170215e-05, + "loss": 0.6371, + "step": 4475 + }, + { + "epoch": 0.24, + "learning_rate": 4.921704318670607e-05, + "loss": 0.8798, + "step": 4480 + }, + { + "epoch": 0.24, + "learning_rate": 4.921530335700911e-05, + "loss": 0.789, + "step": 4485 + }, + { + "epoch": 0.24, + "learning_rate": 4.9213561627215855e-05, + "loss": 0.8795, + "step": 4490 + }, + { + "epoch": 0.24, + "learning_rate": 4.921181799746297e-05, + "loss": 0.7122, + "step": 4495 + }, + { + "epoch": 0.24, + "learning_rate": 4.921007246788728e-05, + "loss": 0.7823, + "step": 4500 + }, + { + "epoch": 0.24, + "learning_rate": 4.920832503862575e-05, + "loss": 0.8281, + "step": 4505 + }, + { + "epoch": 0.24, + "learning_rate": 4.920657570981548e-05, + "loss": 0.8589, + "step": 4510 + }, + { + "epoch": 0.24, + "learning_rate": 4.920482448159375e-05, + "loss": 0.916, + "step": 4515 + }, + { + "epoch": 0.24, + "learning_rate": 4.920307135409797e-05, + "loss": 0.762, + "step": 4520 + }, + { + "epoch": 0.24, + "learning_rate": 4.92013163274657e-05, + "loss": 0.7822, + "step": 4525 + }, + { + "epoch": 0.24, + "learning_rate": 4.919955940183465e-05, + "loss": 0.8692, + "step": 4530 + }, + { + "epoch": 0.24, + "learning_rate": 4.9197800577342677e-05, + "loss": 0.886, + "step": 4535 + }, + { + "epoch": 0.24, + "learning_rate": 4.9196039854127805e-05, + "loss": 0.8793, + "step": 4540 + }, + { + "epoch": 0.24, + "learning_rate": 4.9194277232328176e-05, + "loss": 0.7658, + "step": 4545 + }, + { + "epoch": 0.24, + "learning_rate": 4.919251271208211e-05, + "loss": 0.7981, + "step": 4550 + }, + { + "epoch": 0.24, + "learning_rate": 4.9190746293528056e-05, + "loss": 0.8411, + "step": 4555 + }, + { + "epoch": 0.24, + "learning_rate": 4.918897797680462e-05, + "loss": 0.9487, + "step": 4560 + }, + { + "epoch": 0.24, + "learning_rate": 4.918720776205055e-05, + "loss": 0.7555, + "step": 4565 + }, + { + "epoch": 0.24, + "learning_rate": 4.9185435649404766e-05, + "loss": 0.7755, + "step": 4570 + }, + { + "epoch": 0.24, + "learning_rate": 4.91836616390063e-05, + "loss": 0.914, + "step": 4575 + }, + { + "epoch": 0.25, + "learning_rate": 4.9181885730994356e-05, + "loss": 0.7696, + "step": 4580 + }, + { + "epoch": 0.25, + "learning_rate": 4.9180107925508306e-05, + "loss": 0.8708, + "step": 4585 + }, + { + "epoch": 0.25, + "learning_rate": 4.9178328222687625e-05, + "loss": 0.7438, + "step": 4590 + }, + { + "epoch": 0.25, + "learning_rate": 4.917654662267197e-05, + "loss": 0.8309, + "step": 4595 + }, + { + "epoch": 0.25, + "learning_rate": 4.917476312560113e-05, + "loss": 0.7166, + "step": 4600 + }, + { + "epoch": 0.25, + "learning_rate": 4.917297773161506e-05, + "loss": 0.8975, + "step": 4605 + }, + { + "epoch": 0.25, + "learning_rate": 4.917119044085385e-05, + "loss": 0.7804, + "step": 4610 + }, + { + "epoch": 0.25, + "learning_rate": 4.9169401253457737e-05, + "loss": 0.97, + "step": 4615 + }, + { + "epoch": 0.25, + "learning_rate": 4.9167610169567127e-05, + "loss": 0.9211, + "step": 4620 + }, + { + "epoch": 0.25, + "learning_rate": 4.9165817189322546e-05, + "loss": 0.8738, + "step": 4625 + }, + { + "epoch": 0.25, + "learning_rate": 4.916402231286469e-05, + "loss": 0.8577, + "step": 4630 + }, + { + "epoch": 0.25, + "learning_rate": 4.9162225540334396e-05, + "loss": 0.8111, + "step": 4635 + }, + { + "epoch": 0.25, + "learning_rate": 4.9160426871872645e-05, + "loss": 0.7975, + "step": 4640 + }, + { + "epoch": 0.25, + "learning_rate": 4.9158626307620586e-05, + "loss": 0.7764, + "step": 4645 + }, + { + "epoch": 0.25, + "learning_rate": 4.91568238477195e-05, + "loss": 0.9106, + "step": 4650 + }, + { + "epoch": 0.25, + "learning_rate": 4.915501949231081e-05, + "loss": 0.8469, + "step": 4655 + }, + { + "epoch": 0.25, + "learning_rate": 4.91532132415361e-05, + "loss": 0.9166, + "step": 4660 + }, + { + "epoch": 0.25, + "learning_rate": 4.9151405095537116e-05, + "loss": 0.7444, + "step": 4665 + }, + { + "epoch": 0.25, + "learning_rate": 4.914959505445572e-05, + "loss": 0.8629, + "step": 4670 + }, + { + "epoch": 0.25, + "learning_rate": 4.9147783118433947e-05, + "loss": 0.8151, + "step": 4675 + }, + { + "epoch": 0.25, + "learning_rate": 4.9145969287613976e-05, + "loss": 0.597, + "step": 4680 + }, + { + "epoch": 0.25, + "learning_rate": 4.9144153562138126e-05, + "loss": 0.7783, + "step": 4685 + }, + { + "epoch": 0.25, + "learning_rate": 4.9142335942148873e-05, + "loss": 0.7502, + "step": 4690 + }, + { + "epoch": 0.25, + "learning_rate": 4.9140516427788845e-05, + "loss": 0.7419, + "step": 4695 + }, + { + "epoch": 0.25, + "learning_rate": 4.913869501920081e-05, + "loss": 0.8133, + "step": 4700 + }, + { + "epoch": 0.25, + "learning_rate": 4.913687171652769e-05, + "loss": 0.7746, + "step": 4705 + }, + { + "epoch": 0.25, + "learning_rate": 4.9135046519912545e-05, + "loss": 0.7946, + "step": 4710 + }, + { + "epoch": 0.25, + "learning_rate": 4.91332194294986e-05, + "loss": 0.8155, + "step": 4715 + }, + { + "epoch": 0.25, + "learning_rate": 4.9131390445429224e-05, + "loss": 0.8734, + "step": 4720 + }, + { + "epoch": 0.25, + "learning_rate": 4.9129559567847926e-05, + "loss": 0.732, + "step": 4725 + }, + { + "epoch": 0.25, + "learning_rate": 4.9127726796898367e-05, + "loss": 0.8291, + "step": 4730 + }, + { + "epoch": 0.25, + "learning_rate": 4.912589213272436e-05, + "loss": 0.6404, + "step": 4735 + }, + { + "epoch": 0.25, + "learning_rate": 4.912405557546987e-05, + "loss": 0.8591, + "step": 4740 + }, + { + "epoch": 0.25, + "learning_rate": 4.9122217125279e-05, + "loss": 0.6889, + "step": 4745 + }, + { + "epoch": 0.25, + "learning_rate": 4.912037678229602e-05, + "loss": 0.9284, + "step": 4750 + }, + { + "epoch": 0.25, + "learning_rate": 4.9118534546665314e-05, + "loss": 0.7802, + "step": 4755 + }, + { + "epoch": 0.25, + "learning_rate": 4.911669041853145e-05, + "loss": 0.9108, + "step": 4760 + }, + { + "epoch": 0.25, + "learning_rate": 4.9114844398039125e-05, + "loss": 0.7587, + "step": 4765 + }, + { + "epoch": 0.26, + "learning_rate": 4.91129964853332e-05, + "loss": 0.9941, + "step": 4770 + }, + { + "epoch": 0.26, + "learning_rate": 4.911114668055867e-05, + "loss": 0.884, + "step": 4775 + }, + { + "epoch": 0.26, + "learning_rate": 4.9109294983860675e-05, + "loss": 0.8066, + "step": 4780 + }, + { + "epoch": 0.26, + "learning_rate": 4.9107441395384526e-05, + "loss": 0.8286, + "step": 4785 + }, + { + "epoch": 0.26, + "learning_rate": 4.910558591527565e-05, + "loss": 0.7533, + "step": 4790 + }, + { + "epoch": 0.26, + "learning_rate": 4.910372854367966e-05, + "loss": 0.9376, + "step": 4795 + }, + { + "epoch": 0.26, + "learning_rate": 4.910186928074229e-05, + "loss": 0.8105, + "step": 4800 + }, + { + "epoch": 0.26, + "learning_rate": 4.910000812660942e-05, + "loss": 0.679, + "step": 4805 + }, + { + "epoch": 0.26, + "learning_rate": 4.90981450814271e-05, + "loss": 0.7108, + "step": 4810 + }, + { + "epoch": 0.26, + "learning_rate": 4.9096280145341514e-05, + "loss": 0.8678, + "step": 4815 + }, + { + "epoch": 0.26, + "learning_rate": 4.9094413318499e-05, + "loss": 0.9949, + "step": 4820 + }, + { + "epoch": 0.26, + "learning_rate": 4.909254460104604e-05, + "loss": 0.8043, + "step": 4825 + }, + { + "epoch": 0.26, + "learning_rate": 4.9090673993129265e-05, + "loss": 0.7961, + "step": 4830 + }, + { + "epoch": 0.26, + "learning_rate": 4.9088801494895465e-05, + "loss": 0.7947, + "step": 4835 + }, + { + "epoch": 0.26, + "learning_rate": 4.908692710649156e-05, + "loss": 0.7128, + "step": 4840 + }, + { + "epoch": 0.26, + "learning_rate": 4.908505082806462e-05, + "loss": 0.6772, + "step": 4845 + }, + { + "epoch": 0.26, + "learning_rate": 4.908317265976188e-05, + "loss": 0.5842, + "step": 4850 + }, + { + "epoch": 0.26, + "learning_rate": 4.9081292601730714e-05, + "loss": 0.8447, + "step": 4855 + }, + { + "epoch": 0.26, + "learning_rate": 4.907941065411864e-05, + "loss": 0.8872, + "step": 4860 + }, + { + "epoch": 0.26, + "learning_rate": 4.907752681707333e-05, + "loss": 0.6919, + "step": 4865 + }, + { + "epoch": 0.26, + "learning_rate": 4.9075641090742606e-05, + "loss": 0.7229, + "step": 4870 + }, + { + "epoch": 0.26, + "learning_rate": 4.907375347527443e-05, + "loss": 0.9401, + "step": 4875 + }, + { + "epoch": 0.26, + "learning_rate": 4.9071863970816925e-05, + "loss": 0.8628, + "step": 4880 + }, + { + "epoch": 0.26, + "learning_rate": 4.906997257751834e-05, + "loss": 0.8033, + "step": 4885 + }, + { + "epoch": 0.26, + "learning_rate": 4.90680792955271e-05, + "loss": 0.844, + "step": 4890 + }, + { + "epoch": 0.26, + "learning_rate": 4.906618412499176e-05, + "loss": 0.9655, + "step": 4895 + }, + { + "epoch": 0.26, + "learning_rate": 4.906428706606102e-05, + "loss": 0.9286, + "step": 4900 + }, + { + "epoch": 0.26, + "learning_rate": 4.906238811888375e-05, + "loss": 0.8596, + "step": 4905 + }, + { + "epoch": 0.26, + "learning_rate": 4.9060487283608945e-05, + "loss": 0.7534, + "step": 4910 + }, + { + "epoch": 0.26, + "learning_rate": 4.905858456038577e-05, + "loss": 0.7926, + "step": 4915 + }, + { + "epoch": 0.26, + "learning_rate": 4.90566799493635e-05, + "loss": 0.7473, + "step": 4920 + }, + { + "epoch": 0.26, + "learning_rate": 4.905477345069161e-05, + "loss": 0.8419, + "step": 4925 + }, + { + "epoch": 0.26, + "learning_rate": 4.905286506451968e-05, + "loss": 0.7639, + "step": 4930 + }, + { + "epoch": 0.26, + "learning_rate": 4.905095479099747e-05, + "loss": 0.8235, + "step": 4935 + }, + { + "epoch": 0.26, + "learning_rate": 4.904904263027486e-05, + "loss": 0.7668, + "step": 4940 + }, + { + "epoch": 0.26, + "learning_rate": 4.904712858250189e-05, + "loss": 0.7284, + "step": 4945 + }, + { + "epoch": 0.26, + "learning_rate": 4.904521264782875e-05, + "loss": 0.7893, + "step": 4950 + }, + { + "epoch": 0.27, + "learning_rate": 4.904329482640579e-05, + "loss": 0.8401, + "step": 4955 + }, + { + "epoch": 0.27, + "learning_rate": 4.904137511838348e-05, + "loss": 0.96, + "step": 4960 + }, + { + "epoch": 0.27, + "learning_rate": 4.903945352391247e-05, + "loss": 0.8928, + "step": 4965 + }, + { + "epoch": 0.27, + "learning_rate": 4.903753004314352e-05, + "loss": 0.8379, + "step": 4970 + }, + { + "epoch": 0.27, + "learning_rate": 4.9035604676227574e-05, + "loss": 0.8085, + "step": 4975 + }, + { + "epoch": 0.27, + "learning_rate": 4.903367742331571e-05, + "loss": 0.7874, + "step": 4980 + }, + { + "epoch": 0.27, + "learning_rate": 4.9031748284559145e-05, + "loss": 0.6945, + "step": 4985 + }, + { + "epoch": 0.27, + "learning_rate": 4.902981726010926e-05, + "loss": 0.9188, + "step": 4990 + }, + { + "epoch": 0.27, + "learning_rate": 4.902788435011757e-05, + "loss": 0.8353, + "step": 4995 + }, + { + "epoch": 0.27, + "learning_rate": 4.9025949554735736e-05, + "loss": 0.8942, + "step": 5000 + }, + { + "epoch": 0.27, + "learning_rate": 4.90240128741156e-05, + "loss": 0.6747, + "step": 5005 + }, + { + "epoch": 0.27, + "learning_rate": 4.90220743084091e-05, + "loss": 0.6895, + "step": 5010 + }, + { + "epoch": 0.27, + "learning_rate": 4.902013385776838e-05, + "loss": 0.9258, + "step": 5015 + }, + { + "epoch": 0.27, + "learning_rate": 4.901819152234567e-05, + "loss": 0.678, + "step": 5020 + }, + { + "epoch": 0.27, + "learning_rate": 4.9016247302293396e-05, + "loss": 0.8835, + "step": 5025 + }, + { + "epoch": 0.27, + "learning_rate": 4.90143011977641e-05, + "loss": 0.934, + "step": 5030 + }, + { + "epoch": 0.27, + "learning_rate": 4.901235320891051e-05, + "loss": 0.7437, + "step": 5035 + }, + { + "epoch": 0.27, + "learning_rate": 4.901040333588545e-05, + "loss": 0.9084, + "step": 5040 + }, + { + "epoch": 0.27, + "learning_rate": 4.900845157884195e-05, + "loss": 1.0283, + "step": 5045 + }, + { + "epoch": 0.27, + "learning_rate": 4.900649793793313e-05, + "loss": 1.0257, + "step": 5050 + }, + { + "epoch": 0.27, + "learning_rate": 4.9004542413312305e-05, + "loss": 0.8054, + "step": 5055 + }, + { + "epoch": 0.27, + "learning_rate": 4.9002585005132914e-05, + "loss": 0.7625, + "step": 5060 + }, + { + "epoch": 0.27, + "learning_rate": 4.9000625713548545e-05, + "loss": 0.7329, + "step": 5065 + }, + { + "epoch": 0.27, + "learning_rate": 4.899866453871294e-05, + "loss": 0.7228, + "step": 5070 + }, + { + "epoch": 0.27, + "learning_rate": 4.899670148077998e-05, + "loss": 0.8376, + "step": 5075 + }, + { + "epoch": 0.27, + "learning_rate": 4.899473653990371e-05, + "loss": 0.9449, + "step": 5080 + }, + { + "epoch": 0.27, + "learning_rate": 4.899276971623831e-05, + "loss": 0.9834, + "step": 5085 + }, + { + "epoch": 0.27, + "learning_rate": 4.8990801009938103e-05, + "loss": 0.8887, + "step": 5090 + }, + { + "epoch": 0.27, + "learning_rate": 4.898883042115758e-05, + "loss": 0.8239, + "step": 5095 + }, + { + "epoch": 0.27, + "learning_rate": 4.898685795005135e-05, + "loss": 0.7858, + "step": 5100 + }, + { + "epoch": 0.27, + "learning_rate": 4.89848835967742e-05, + "loss": 0.9186, + "step": 5105 + }, + { + "epoch": 0.27, + "learning_rate": 4.898290736148105e-05, + "loss": 0.7272, + "step": 5110 + }, + { + "epoch": 0.27, + "learning_rate": 4.898092924432695e-05, + "loss": 0.606, + "step": 5115 + }, + { + "epoch": 0.27, + "learning_rate": 4.897894924546714e-05, + "loss": 0.7839, + "step": 5120 + }, + { + "epoch": 0.27, + "learning_rate": 4.897696736505698e-05, + "loss": 0.7314, + "step": 5125 + }, + { + "epoch": 0.27, + "learning_rate": 4.8974983603251964e-05, + "loss": 0.9404, + "step": 5130 + }, + { + "epoch": 0.27, + "learning_rate": 4.8972997960207776e-05, + "loss": 0.8616, + "step": 5135 + }, + { + "epoch": 0.27, + "learning_rate": 4.897101043608021e-05, + "loss": 0.7068, + "step": 5140 + }, + { + "epoch": 0.28, + "learning_rate": 4.896902103102522e-05, + "loss": 0.79, + "step": 5145 + }, + { + "epoch": 0.28, + "learning_rate": 4.896702974519891e-05, + "loss": 0.8714, + "step": 5150 + }, + { + "epoch": 0.28, + "learning_rate": 4.8965036578757536e-05, + "loss": 0.7654, + "step": 5155 + }, + { + "epoch": 0.28, + "learning_rate": 4.896304153185748e-05, + "loss": 0.8567, + "step": 5160 + }, + { + "epoch": 0.28, + "learning_rate": 4.8961044604655304e-05, + "loss": 0.8752, + "step": 5165 + }, + { + "epoch": 0.28, + "learning_rate": 4.895904579730769e-05, + "loss": 0.7139, + "step": 5170 + }, + { + "epoch": 0.28, + "learning_rate": 4.8957045109971476e-05, + "loss": 0.936, + "step": 5175 + }, + { + "epoch": 0.28, + "learning_rate": 4.895504254280366e-05, + "loss": 0.6209, + "step": 5180 + }, + { + "epoch": 0.28, + "learning_rate": 4.8953038095961366e-05, + "loss": 0.9938, + "step": 5185 + }, + { + "epoch": 0.28, + "learning_rate": 4.895103176960188e-05, + "loss": 0.6716, + "step": 5190 + }, + { + "epoch": 0.28, + "learning_rate": 4.894902356388263e-05, + "loss": 0.9064, + "step": 5195 + }, + { + "epoch": 0.28, + "learning_rate": 4.894701347896121e-05, + "loss": 0.7788, + "step": 5200 + }, + { + "epoch": 0.28, + "learning_rate": 4.894500151499532e-05, + "loss": 0.7092, + "step": 5205 + }, + { + "epoch": 0.28, + "learning_rate": 4.894298767214285e-05, + "loss": 0.8147, + "step": 5210 + }, + { + "epoch": 0.28, + "learning_rate": 4.894097195056181e-05, + "loss": 0.9334, + "step": 5215 + }, + { + "epoch": 0.28, + "learning_rate": 4.893895435041037e-05, + "loss": 0.7708, + "step": 5220 + }, + { + "epoch": 0.28, + "learning_rate": 4.8936934871846854e-05, + "loss": 0.8733, + "step": 5225 + }, + { + "epoch": 0.28, + "learning_rate": 4.8934913515029703e-05, + "loss": 0.8391, + "step": 5230 + }, + { + "epoch": 0.28, + "learning_rate": 4.893289028011755e-05, + "loss": 0.7536, + "step": 5235 + }, + { + "epoch": 0.28, + "learning_rate": 4.8930865167269126e-05, + "loss": 0.8303, + "step": 5240 + }, + { + "epoch": 0.28, + "learning_rate": 4.8928838176643357e-05, + "loss": 0.8649, + "step": 5245 + }, + { + "epoch": 0.28, + "learning_rate": 4.892680930839929e-05, + "loss": 0.8889, + "step": 5250 + }, + { + "epoch": 0.28, + "learning_rate": 4.892477856269611e-05, + "loss": 0.9678, + "step": 5255 + }, + { + "epoch": 0.28, + "learning_rate": 4.892274593969318e-05, + "loss": 0.7796, + "step": 5260 + }, + { + "epoch": 0.28, + "learning_rate": 4.8920711439549985e-05, + "loss": 0.6292, + "step": 5265 + }, + { + "epoch": 0.28, + "learning_rate": 4.891867506242617e-05, + "loss": 0.9088, + "step": 5270 + }, + { + "epoch": 0.28, + "learning_rate": 4.8916636808481514e-05, + "loss": 0.8349, + "step": 5275 + }, + { + "epoch": 0.28, + "learning_rate": 4.8914596677875965e-05, + "loss": 0.6519, + "step": 5280 + }, + { + "epoch": 0.28, + "learning_rate": 4.8912554670769596e-05, + "loss": 0.8597, + "step": 5285 + }, + { + "epoch": 0.28, + "learning_rate": 4.891051078732263e-05, + "loss": 0.7662, + "step": 5290 + }, + { + "epoch": 0.28, + "learning_rate": 4.890846502769547e-05, + "loss": 0.789, + "step": 5295 + }, + { + "epoch": 0.28, + "learning_rate": 4.8906417392048606e-05, + "loss": 0.8971, + "step": 5300 + }, + { + "epoch": 0.28, + "learning_rate": 4.890436788054274e-05, + "loss": 0.8498, + "step": 5305 + }, + { + "epoch": 0.28, + "learning_rate": 4.890231649333867e-05, + "loss": 0.854, + "step": 5310 + }, + { + "epoch": 0.28, + "learning_rate": 4.8900263230597377e-05, + "loss": 0.9026, + "step": 5315 + }, + { + "epoch": 0.28, + "learning_rate": 4.889820809247996e-05, + "loss": 0.7707, + "step": 5320 + }, + { + "epoch": 0.28, + "learning_rate": 4.889615107914768e-05, + "loss": 0.8193, + "step": 5325 + }, + { + "epoch": 0.29, + "learning_rate": 4.889409219076195e-05, + "loss": 0.8653, + "step": 5330 + }, + { + "epoch": 0.29, + "learning_rate": 4.889203142748433e-05, + "loss": 0.848, + "step": 5335 + }, + { + "epoch": 0.29, + "learning_rate": 4.888996878947652e-05, + "loss": 0.8134, + "step": 5340 + }, + { + "epoch": 0.29, + "learning_rate": 4.888790427690035e-05, + "loss": 0.6155, + "step": 5345 + }, + { + "epoch": 0.29, + "learning_rate": 4.888583788991783e-05, + "loss": 0.8795, + "step": 5350 + }, + { + "epoch": 0.29, + "learning_rate": 4.8883769628691106e-05, + "loss": 0.8331, + "step": 5355 + }, + { + "epoch": 0.29, + "learning_rate": 4.888169949338246e-05, + "loss": 0.7025, + "step": 5360 + }, + { + "epoch": 0.29, + "learning_rate": 4.8879627484154335e-05, + "loss": 0.8499, + "step": 5365 + }, + { + "epoch": 0.29, + "learning_rate": 4.8877553601169314e-05, + "loss": 0.7206, + "step": 5370 + }, + { + "epoch": 0.29, + "learning_rate": 4.8875477844590125e-05, + "loss": 1.0382, + "step": 5375 + }, + { + "epoch": 0.29, + "learning_rate": 4.8873400214579634e-05, + "loss": 0.7875, + "step": 5380 + }, + { + "epoch": 0.29, + "learning_rate": 4.8871320711300893e-05, + "loss": 0.7156, + "step": 5385 + }, + { + "epoch": 0.29, + "learning_rate": 4.8869239334917054e-05, + "loss": 0.9238, + "step": 5390 + }, + { + "epoch": 0.29, + "learning_rate": 4.8867156085591444e-05, + "loss": 0.6847, + "step": 5395 + }, + { + "epoch": 0.29, + "learning_rate": 4.886507096348752e-05, + "loss": 0.6943, + "step": 5400 + }, + { + "epoch": 0.29, + "learning_rate": 4.8862983968768905e-05, + "loss": 0.8144, + "step": 5405 + }, + { + "epoch": 0.29, + "learning_rate": 4.886089510159936e-05, + "loss": 0.8644, + "step": 5410 + }, + { + "epoch": 0.29, + "learning_rate": 4.8858804362142775e-05, + "loss": 0.8313, + "step": 5415 + }, + { + "epoch": 0.29, + "learning_rate": 4.885671175056322e-05, + "loss": 0.6607, + "step": 5420 + }, + { + "epoch": 0.29, + "learning_rate": 4.8854617267024885e-05, + "loss": 0.8752, + "step": 5425 + }, + { + "epoch": 0.29, + "learning_rate": 4.885252091169214e-05, + "loss": 0.7901, + "step": 5430 + }, + { + "epoch": 0.29, + "learning_rate": 4.8850422684729436e-05, + "loss": 0.9387, + "step": 5435 + }, + { + "epoch": 0.29, + "learning_rate": 4.884832258630145e-05, + "loss": 0.7988, + "step": 5440 + }, + { + "epoch": 0.29, + "learning_rate": 4.884622061657297e-05, + "loss": 0.7975, + "step": 5445 + }, + { + "epoch": 0.29, + "learning_rate": 4.884411677570891e-05, + "loss": 0.9318, + "step": 5450 + }, + { + "epoch": 0.29, + "learning_rate": 4.884201106387436e-05, + "loss": 0.8113, + "step": 5455 + }, + { + "epoch": 0.29, + "learning_rate": 4.883990348123456e-05, + "loss": 0.8999, + "step": 5460 + }, + { + "epoch": 0.29, + "learning_rate": 4.8837794027954864e-05, + "loss": 0.8144, + "step": 5465 + }, + { + "epoch": 0.29, + "learning_rate": 4.883568270420081e-05, + "loss": 0.6924, + "step": 5470 + }, + { + "epoch": 0.29, + "learning_rate": 4.883356951013807e-05, + "loss": 0.7643, + "step": 5475 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831454445932444e-05, + "loss": 0.8673, + "step": 5480 + }, + { + "epoch": 0.29, + "learning_rate": 4.88293375117499e-05, + "loss": 0.7358, + "step": 5485 + }, + { + "epoch": 0.29, + "learning_rate": 4.882721870775655e-05, + "loss": 0.8188, + "step": 5490 + }, + { + "epoch": 0.29, + "learning_rate": 4.882509803411865e-05, + "loss": 0.7592, + "step": 5495 + }, + { + "epoch": 0.29, + "learning_rate": 4.8822975491002606e-05, + "loss": 0.8642, + "step": 5500 + }, + { + "epoch": 0.29, + "learning_rate": 4.8820851078574955e-05, + "loss": 0.8981, + "step": 5505 + }, + { + "epoch": 0.29, + "learning_rate": 4.8818724797002404e-05, + "loss": 0.7225, + "step": 5510 + }, + { + "epoch": 0.3, + "learning_rate": 4.881659664645178e-05, + "loss": 0.7061, + "step": 5515 + }, + { + "epoch": 0.3, + "learning_rate": 4.8814466627090094e-05, + "loss": 0.7144, + "step": 5520 + }, + { + "epoch": 0.3, + "learning_rate": 4.881233473908447e-05, + "loss": 0.774, + "step": 5525 + }, + { + "epoch": 0.3, + "learning_rate": 4.8810200982602186e-05, + "loss": 0.9191, + "step": 5530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880806535781069e-05, + "loss": 0.7728, + "step": 5535 + }, + { + "epoch": 0.3, + "learning_rate": 4.8805927864877526e-05, + "loss": 0.7725, + "step": 5540 + }, + { + "epoch": 0.3, + "learning_rate": 4.880378850397044e-05, + "loss": 0.8261, + "step": 5545 + }, + { + "epoch": 0.3, + "learning_rate": 4.88016472752573e-05, + "loss": 0.7638, + "step": 5550 + }, + { + "epoch": 0.3, + "learning_rate": 4.879950417890611e-05, + "loss": 0.8227, + "step": 5555 + }, + { + "epoch": 0.3, + "learning_rate": 4.879735921508504e-05, + "loss": 0.9262, + "step": 5560 + }, + { + "epoch": 0.3, + "learning_rate": 4.8795212383962396e-05, + "loss": 0.8089, + "step": 5565 + }, + { + "epoch": 0.3, + "learning_rate": 4.879306368570663e-05, + "loss": 0.8948, + "step": 5570 + }, + { + "epoch": 0.3, + "learning_rate": 4.879091312048635e-05, + "loss": 0.7509, + "step": 5575 + }, + { + "epoch": 0.3, + "learning_rate": 4.878876068847029e-05, + "loss": 0.8049, + "step": 5580 + }, + { + "epoch": 0.3, + "learning_rate": 4.8786606389827366e-05, + "loss": 0.694, + "step": 5585 + }, + { + "epoch": 0.3, + "learning_rate": 4.8784450224726606e-05, + "loss": 0.7351, + "step": 5590 + }, + { + "epoch": 0.3, + "learning_rate": 4.8782292193337195e-05, + "loss": 0.8148, + "step": 5595 + }, + { + "epoch": 0.3, + "learning_rate": 4.878013229582847e-05, + "loss": 0.7501, + "step": 5600 + }, + { + "epoch": 0.3, + "learning_rate": 4.8777970532369915e-05, + "loss": 0.7474, + "step": 5605 + }, + { + "epoch": 0.3, + "learning_rate": 4.877580690313115e-05, + "loss": 0.9247, + "step": 5610 + }, + { + "epoch": 0.3, + "learning_rate": 4.877364140828196e-05, + "loss": 0.8913, + "step": 5615 + }, + { + "epoch": 0.3, + "learning_rate": 4.877147404799224e-05, + "loss": 0.8387, + "step": 5620 + }, + { + "epoch": 0.3, + "learning_rate": 4.876930482243208e-05, + "loss": 0.7817, + "step": 5625 + }, + { + "epoch": 0.3, + "learning_rate": 4.876713373177168e-05, + "loss": 0.7441, + "step": 5630 + }, + { + "epoch": 0.3, + "learning_rate": 4.87649607761814e-05, + "loss": 0.806, + "step": 5635 + }, + { + "epoch": 0.3, + "learning_rate": 4.876278595583176e-05, + "loss": 0.6691, + "step": 5640 + }, + { + "epoch": 0.3, + "learning_rate": 4.876060927089338e-05, + "loss": 0.918, + "step": 5645 + }, + { + "epoch": 0.3, + "learning_rate": 4.875843072153709e-05, + "loss": 0.9452, + "step": 5650 + }, + { + "epoch": 0.3, + "learning_rate": 4.87562503079338e-05, + "loss": 0.7853, + "step": 5655 + }, + { + "epoch": 0.3, + "learning_rate": 4.875406803025463e-05, + "loss": 0.7425, + "step": 5660 + }, + { + "epoch": 0.3, + "learning_rate": 4.875188388867081e-05, + "loss": 0.8371, + "step": 5665 + }, + { + "epoch": 0.3, + "learning_rate": 4.87496978833537e-05, + "loss": 0.7431, + "step": 5670 + }, + { + "epoch": 0.3, + "learning_rate": 4.874751001447486e-05, + "loss": 0.8835, + "step": 5675 + }, + { + "epoch": 0.3, + "learning_rate": 4.874532028220595e-05, + "loss": 0.8855, + "step": 5680 + }, + { + "epoch": 0.3, + "learning_rate": 4.8743128686718797e-05, + "loss": 0.8672, + "step": 5685 + }, + { + "epoch": 0.3, + "learning_rate": 4.874093522818536e-05, + "loss": 0.9386, + "step": 5690 + }, + { + "epoch": 0.3, + "learning_rate": 4.873873990677775e-05, + "loss": 0.9365, + "step": 5695 + }, + { + "epoch": 0.3, + "learning_rate": 4.8736542722668234e-05, + "loss": 0.8297, + "step": 5700 + }, + { + "epoch": 0.31, + "learning_rate": 4.8734343676029216e-05, + "loss": 0.7469, + "step": 5705 + }, + { + "epoch": 0.31, + "learning_rate": 4.873214276703325e-05, + "loss": 0.9627, + "step": 5710 + }, + { + "epoch": 0.31, + "learning_rate": 4.8729939995853034e-05, + "loss": 0.7226, + "step": 5715 + }, + { + "epoch": 0.31, + "learning_rate": 4.872773536266142e-05, + "loss": 0.8238, + "step": 5720 + }, + { + "epoch": 0.31, + "learning_rate": 4.8725528867631375e-05, + "loss": 0.8192, + "step": 5725 + }, + { + "epoch": 0.31, + "learning_rate": 4.8723320510936064e-05, + "loss": 0.8981, + "step": 5730 + }, + { + "epoch": 0.31, + "learning_rate": 4.872111029274875e-05, + "loss": 0.7524, + "step": 5735 + }, + { + "epoch": 0.31, + "learning_rate": 4.871889821324286e-05, + "loss": 0.6757, + "step": 5740 + }, + { + "epoch": 0.31, + "learning_rate": 4.8716684272591984e-05, + "loss": 0.9226, + "step": 5745 + }, + { + "epoch": 0.31, + "learning_rate": 4.8714468470969836e-05, + "loss": 0.9441, + "step": 5750 + }, + { + "epoch": 0.31, + "learning_rate": 4.871225080855028e-05, + "loss": 0.8892, + "step": 5755 + }, + { + "epoch": 0.31, + "learning_rate": 4.8710031285507326e-05, + "loss": 0.8726, + "step": 5760 + }, + { + "epoch": 0.31, + "learning_rate": 4.870780990201514e-05, + "loss": 0.6471, + "step": 5765 + }, + { + "epoch": 0.31, + "learning_rate": 4.870558665824803e-05, + "loss": 0.9857, + "step": 5770 + }, + { + "epoch": 0.31, + "learning_rate": 4.870336155438044e-05, + "loss": 0.8579, + "step": 5775 + }, + { + "epoch": 0.31, + "learning_rate": 4.870113459058696e-05, + "loss": 0.8928, + "step": 5780 + }, + { + "epoch": 0.31, + "learning_rate": 4.869890576704234e-05, + "loss": 0.6858, + "step": 5785 + }, + { + "epoch": 0.31, + "learning_rate": 4.869667508392148e-05, + "loss": 0.7689, + "step": 5790 + }, + { + "epoch": 0.31, + "learning_rate": 4.86944425413994e-05, + "loss": 0.9229, + "step": 5795 + }, + { + "epoch": 0.31, + "learning_rate": 4.8692208139651274e-05, + "loss": 0.7077, + "step": 5800 + }, + { + "epoch": 0.31, + "learning_rate": 4.868997187885244e-05, + "loss": 0.9604, + "step": 5805 + }, + { + "epoch": 0.31, + "learning_rate": 4.868773375917837e-05, + "loss": 0.8189, + "step": 5810 + }, + { + "epoch": 0.31, + "learning_rate": 4.868549378080468e-05, + "loss": 0.6778, + "step": 5815 + }, + { + "epoch": 0.31, + "learning_rate": 4.868325194390714e-05, + "loss": 0.8863, + "step": 5820 + }, + { + "epoch": 0.31, + "learning_rate": 4.8681008248661645e-05, + "loss": 0.7959, + "step": 5825 + }, + { + "epoch": 0.31, + "learning_rate": 4.8678762695244265e-05, + "loss": 0.7375, + "step": 5830 + }, + { + "epoch": 0.31, + "learning_rate": 4.8676515283831195e-05, + "loss": 0.7799, + "step": 5835 + }, + { + "epoch": 0.31, + "learning_rate": 4.867426601459877e-05, + "loss": 0.8646, + "step": 5840 + }, + { + "epoch": 0.31, + "learning_rate": 4.8672014887723505e-05, + "loss": 0.7989, + "step": 5845 + }, + { + "epoch": 0.31, + "learning_rate": 4.866976190338203e-05, + "loss": 0.7567, + "step": 5850 + }, + { + "epoch": 0.31, + "learning_rate": 4.866750706175113e-05, + "loss": 0.8682, + "step": 5855 + }, + { + "epoch": 0.31, + "learning_rate": 4.8665250363007726e-05, + "loss": 0.9152, + "step": 5860 + }, + { + "epoch": 0.31, + "learning_rate": 4.866299180732891e-05, + "loss": 0.8303, + "step": 5865 + }, + { + "epoch": 0.31, + "learning_rate": 4.866073139489188e-05, + "loss": 0.8081, + "step": 5870 + }, + { + "epoch": 0.31, + "learning_rate": 4.8658469125874026e-05, + "loss": 0.7795, + "step": 5875 + }, + { + "epoch": 0.31, + "learning_rate": 4.8656205000452855e-05, + "loss": 0.9127, + "step": 5880 + }, + { + "epoch": 0.31, + "learning_rate": 4.8653939018806014e-05, + "loss": 0.9329, + "step": 5885 + }, + { + "epoch": 0.32, + "learning_rate": 4.8651671181111325e-05, + "loss": 0.7977, + "step": 5890 + }, + { + "epoch": 0.32, + "learning_rate": 4.864940148754673e-05, + "loss": 0.8458, + "step": 5895 + }, + { + "epoch": 0.32, + "learning_rate": 4.864712993829033e-05, + "loss": 0.6276, + "step": 5900 + }, + { + "epoch": 0.32, + "learning_rate": 4.864485653352034e-05, + "loss": 0.8415, + "step": 5905 + }, + { + "epoch": 0.32, + "learning_rate": 4.8642581273415183e-05, + "loss": 0.6388, + "step": 5910 + }, + { + "epoch": 0.32, + "learning_rate": 4.864030415815337e-05, + "loss": 0.9421, + "step": 5915 + }, + { + "epoch": 0.32, + "learning_rate": 4.863802518791358e-05, + "loss": 0.9388, + "step": 5920 + }, + { + "epoch": 0.32, + "learning_rate": 4.863574436287465e-05, + "loss": 0.6694, + "step": 5925 + }, + { + "epoch": 0.32, + "learning_rate": 4.863346168321553e-05, + "loss": 0.9516, + "step": 5930 + }, + { + "epoch": 0.32, + "learning_rate": 4.8631177149115346e-05, + "loss": 0.9281, + "step": 5935 + }, + { + "epoch": 0.32, + "learning_rate": 4.862889076075336e-05, + "loss": 0.8082, + "step": 5940 + }, + { + "epoch": 0.32, + "learning_rate": 4.862660251830897e-05, + "loss": 0.8666, + "step": 5945 + }, + { + "epoch": 0.32, + "learning_rate": 4.862431242196173e-05, + "loss": 0.8533, + "step": 5950 + }, + { + "epoch": 0.32, + "learning_rate": 4.8622020471891336e-05, + "loss": 0.9231, + "step": 5955 + }, + { + "epoch": 0.32, + "learning_rate": 4.861972666827763e-05, + "loss": 0.6603, + "step": 5960 + }, + { + "epoch": 0.32, + "learning_rate": 4.8617431011300596e-05, + "loss": 0.7848, + "step": 5965 + }, + { + "epoch": 0.32, + "learning_rate": 4.8615133501140374e-05, + "loss": 0.8393, + "step": 5970 + }, + { + "epoch": 0.32, + "learning_rate": 4.861283413797724e-05, + "loss": 0.9132, + "step": 5975 + }, + { + "epoch": 0.32, + "learning_rate": 4.8610532921991615e-05, + "loss": 0.8903, + "step": 5980 + }, + { + "epoch": 0.32, + "learning_rate": 4.860822985336407e-05, + "loss": 0.8436, + "step": 5985 + }, + { + "epoch": 0.32, + "learning_rate": 4.8605924932275315e-05, + "loss": 0.8425, + "step": 5990 + }, + { + "epoch": 0.32, + "learning_rate": 4.860361815890622e-05, + "loss": 0.8307, + "step": 5995 + }, + { + "epoch": 0.32, + "learning_rate": 4.860130953343778e-05, + "loss": 0.8491, + "step": 6000 + }, + { + "epoch": 0.32, + "learning_rate": 4.859899905605114e-05, + "loss": 0.7904, + "step": 6005 + }, + { + "epoch": 0.32, + "learning_rate": 4.8596686726927616e-05, + "loss": 0.8774, + "step": 6010 + }, + { + "epoch": 0.32, + "learning_rate": 4.8594372546248634e-05, + "loss": 0.7607, + "step": 6015 + }, + { + "epoch": 0.32, + "learning_rate": 4.859205651419578e-05, + "loss": 0.8349, + "step": 6020 + }, + { + "epoch": 0.32, + "learning_rate": 4.858973863095079e-05, + "loss": 0.7619, + "step": 6025 + }, + { + "epoch": 0.32, + "learning_rate": 4.858741889669554e-05, + "loss": 0.7539, + "step": 6030 + }, + { + "epoch": 0.32, + "learning_rate": 4.8585097311612046e-05, + "loss": 0.8129, + "step": 6035 + }, + { + "epoch": 0.32, + "learning_rate": 4.8582773875882494e-05, + "loss": 0.8613, + "step": 6040 + }, + { + "epoch": 0.32, + "learning_rate": 4.858044858968918e-05, + "loss": 0.6603, + "step": 6045 + }, + { + "epoch": 0.32, + "learning_rate": 4.857812145321455e-05, + "loss": 1.1471, + "step": 6050 + }, + { + "epoch": 0.32, + "learning_rate": 4.857579246664123e-05, + "loss": 0.9696, + "step": 6055 + }, + { + "epoch": 0.32, + "learning_rate": 4.8573461630151975e-05, + "loss": 0.8304, + "step": 6060 + }, + { + "epoch": 0.32, + "learning_rate": 4.8571128943929644e-05, + "loss": 0.9174, + "step": 6065 + }, + { + "epoch": 0.32, + "learning_rate": 4.8568794408157305e-05, + "loss": 0.7374, + "step": 6070 + }, + { + "epoch": 0.32, + "learning_rate": 4.856645802301812e-05, + "loss": 0.7429, + "step": 6075 + }, + { + "epoch": 0.33, + "learning_rate": 4.856411978869544e-05, + "loss": 0.654, + "step": 6080 + }, + { + "epoch": 0.33, + "learning_rate": 4.856177970537272e-05, + "loss": 0.7194, + "step": 6085 + }, + { + "epoch": 0.33, + "learning_rate": 4.85594377732336e-05, + "loss": 0.8306, + "step": 6090 + }, + { + "epoch": 0.33, + "learning_rate": 4.855709399246181e-05, + "loss": 0.7886, + "step": 6095 + }, + { + "epoch": 0.33, + "learning_rate": 4.8554748363241286e-05, + "loss": 0.8086, + "step": 6100 + }, + { + "epoch": 0.33, + "learning_rate": 4.8552400885756074e-05, + "loss": 0.5731, + "step": 6105 + }, + { + "epoch": 0.33, + "learning_rate": 4.8550051560190377e-05, + "loss": 0.8254, + "step": 6110 + }, + { + "epoch": 0.33, + "learning_rate": 4.8547700386728534e-05, + "loss": 0.7931, + "step": 6115 + }, + { + "epoch": 0.33, + "learning_rate": 4.854534736555504e-05, + "loss": 0.8459, + "step": 6120 + }, + { + "epoch": 0.33, + "learning_rate": 4.854299249685451e-05, + "loss": 0.825, + "step": 6125 + }, + { + "epoch": 0.33, + "learning_rate": 4.854063578081175e-05, + "loss": 0.7507, + "step": 6130 + }, + { + "epoch": 0.33, + "learning_rate": 4.853827721761167e-05, + "loss": 0.7269, + "step": 6135 + }, + { + "epoch": 0.33, + "learning_rate": 4.853591680743933e-05, + "loss": 0.8861, + "step": 6140 + }, + { + "epoch": 0.33, + "learning_rate": 4.853355455047996e-05, + "loss": 0.9874, + "step": 6145 + }, + { + "epoch": 0.33, + "learning_rate": 4.8531190446918916e-05, + "loss": 0.7483, + "step": 6150 + }, + { + "epoch": 0.33, + "learning_rate": 4.8528824496941695e-05, + "loss": 0.8492, + "step": 6155 + }, + { + "epoch": 0.33, + "learning_rate": 4.852645670073395e-05, + "loss": 0.7936, + "step": 6160 + }, + { + "epoch": 0.33, + "learning_rate": 4.852408705848146e-05, + "loss": 0.8202, + "step": 6165 + }, + { + "epoch": 0.33, + "learning_rate": 4.852171557037019e-05, + "loss": 0.7985, + "step": 6170 + }, + { + "epoch": 0.33, + "learning_rate": 4.85193422365862e-05, + "loss": 0.8009, + "step": 6175 + }, + { + "epoch": 0.33, + "learning_rate": 4.851696705731573e-05, + "loss": 0.8705, + "step": 6180 + }, + { + "epoch": 0.33, + "learning_rate": 4.851459003274515e-05, + "loss": 0.8157, + "step": 6185 + }, + { + "epoch": 0.33, + "learning_rate": 4.8512211163060974e-05, + "loss": 0.8059, + "step": 6190 + }, + { + "epoch": 0.33, + "learning_rate": 4.850983044844987e-05, + "loss": 0.9143, + "step": 6195 + }, + { + "epoch": 0.33, + "learning_rate": 4.8507447889098634e-05, + "loss": 0.9241, + "step": 6200 + }, + { + "epoch": 0.33, + "learning_rate": 4.850506348519424e-05, + "loss": 0.7165, + "step": 6205 + }, + { + "epoch": 0.33, + "learning_rate": 4.850267723692376e-05, + "loss": 0.8503, + "step": 6210 + }, + { + "epoch": 0.33, + "learning_rate": 4.8500289144474436e-05, + "loss": 0.7503, + "step": 6215 + }, + { + "epoch": 0.33, + "learning_rate": 4.849789920803367e-05, + "loss": 0.959, + "step": 6220 + }, + { + "epoch": 0.33, + "learning_rate": 4.849550742778899e-05, + "loss": 0.8166, + "step": 6225 + }, + { + "epoch": 0.33, + "learning_rate": 4.849311380392806e-05, + "loss": 0.7547, + "step": 6230 + }, + { + "epoch": 0.33, + "learning_rate": 4.8490718336638705e-05, + "loss": 0.7028, + "step": 6235 + }, + { + "epoch": 0.33, + "learning_rate": 4.84883210261089e-05, + "loss": 0.9626, + "step": 6240 + }, + { + "epoch": 0.33, + "learning_rate": 4.848592187252674e-05, + "loss": 0.7794, + "step": 6245 + }, + { + "epoch": 0.33, + "learning_rate": 4.8483520876080484e-05, + "loss": 0.9036, + "step": 6250 + }, + { + "epoch": 0.33, + "learning_rate": 4.848111803695853e-05, + "loss": 0.8925, + "step": 6255 + }, + { + "epoch": 0.33, + "learning_rate": 4.8478713355349426e-05, + "loss": 0.8219, + "step": 6260 + }, + { + "epoch": 0.34, + "learning_rate": 4.847630683144185e-05, + "loss": 0.7414, + "step": 6265 + }, + { + "epoch": 0.34, + "learning_rate": 4.847389846542464e-05, + "loss": 0.8704, + "step": 6270 + }, + { + "epoch": 0.34, + "learning_rate": 4.8471488257486766e-05, + "loss": 0.7168, + "step": 6275 + }, + { + "epoch": 0.34, + "learning_rate": 4.846907620781736e-05, + "loss": 0.854, + "step": 6280 + }, + { + "epoch": 0.34, + "learning_rate": 4.846666231660568e-05, + "loss": 0.8836, + "step": 6285 + }, + { + "epoch": 0.34, + "learning_rate": 4.8464246584041145e-05, + "loss": 0.9883, + "step": 6290 + }, + { + "epoch": 0.34, + "learning_rate": 4.84618290103133e-05, + "loss": 0.752, + "step": 6295 + }, + { + "epoch": 0.34, + "learning_rate": 4.845940959561185e-05, + "loss": 0.7937, + "step": 6300 + }, + { + "epoch": 0.34, + "learning_rate": 4.845698834012663e-05, + "loss": 1.0104, + "step": 6305 + }, + { + "epoch": 0.34, + "learning_rate": 4.8454565244047634e-05, + "loss": 0.7503, + "step": 6310 + }, + { + "epoch": 0.34, + "learning_rate": 4.8452140307565e-05, + "loss": 0.7292, + "step": 6315 + }, + { + "epoch": 0.34, + "learning_rate": 4.8449713530869e-05, + "loss": 0.9591, + "step": 6320 + }, + { + "epoch": 0.34, + "learning_rate": 4.844728491415005e-05, + "loss": 0.7253, + "step": 6325 + }, + { + "epoch": 0.34, + "learning_rate": 4.844485445759872e-05, + "loss": 0.6955, + "step": 6330 + }, + { + "epoch": 0.34, + "learning_rate": 4.844242216140573e-05, + "loss": 0.8349, + "step": 6335 + }, + { + "epoch": 0.34, + "learning_rate": 4.8439988025761915e-05, + "loss": 0.7472, + "step": 6340 + }, + { + "epoch": 0.34, + "learning_rate": 4.843755205085829e-05, + "loss": 0.8337, + "step": 6345 + }, + { + "epoch": 0.34, + "learning_rate": 4.843511423688599e-05, + "loss": 0.768, + "step": 6350 + }, + { + "epoch": 0.34, + "learning_rate": 4.843267458403631e-05, + "loss": 0.7714, + "step": 6355 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430233092500666e-05, + "loss": 0.7844, + "step": 6360 + }, + { + "epoch": 0.34, + "learning_rate": 4.8427789762470654e-05, + "loss": 0.9785, + "step": 6365 + }, + { + "epoch": 0.34, + "learning_rate": 4.842534459413798e-05, + "loss": 0.8883, + "step": 6370 + }, + { + "epoch": 0.34, + "learning_rate": 4.8422897587694503e-05, + "loss": 0.7626, + "step": 6375 + }, + { + "epoch": 0.34, + "learning_rate": 4.842044874333225e-05, + "loss": 0.726, + "step": 6380 + }, + { + "epoch": 0.34, + "learning_rate": 4.8417998061243366e-05, + "loss": 0.6785, + "step": 6385 + }, + { + "epoch": 0.34, + "learning_rate": 4.841554554162014e-05, + "loss": 0.8227, + "step": 6390 + }, + { + "epoch": 0.34, + "learning_rate": 4.841309118465503e-05, + "loss": 0.8717, + "step": 6395 + }, + { + "epoch": 0.34, + "learning_rate": 4.84106349905406e-05, + "loss": 0.551, + "step": 6400 + }, + { + "epoch": 0.34, + "learning_rate": 4.84081769594696e-05, + "loss": 0.8788, + "step": 6405 + }, + { + "epoch": 0.34, + "learning_rate": 4.8405717091634894e-05, + "loss": 0.64, + "step": 6410 + }, + { + "epoch": 0.34, + "learning_rate": 4.8403255387229496e-05, + "loss": 0.8521, + "step": 6415 + }, + { + "epoch": 0.34, + "learning_rate": 4.840079184644658e-05, + "loss": 0.8083, + "step": 6420 + }, + { + "epoch": 0.34, + "learning_rate": 4.839832646947944e-05, + "loss": 0.707, + "step": 6425 + }, + { + "epoch": 0.34, + "learning_rate": 4.839585925652153e-05, + "loss": 0.7592, + "step": 6430 + }, + { + "epoch": 0.34, + "learning_rate": 4.839339020776645e-05, + "loss": 0.8081, + "step": 6435 + }, + { + "epoch": 0.34, + "learning_rate": 4.839091932340793e-05, + "loss": 0.7157, + "step": 6440 + }, + { + "epoch": 0.34, + "learning_rate": 4.838844660363985e-05, + "loss": 0.9523, + "step": 6445 + }, + { + "epoch": 0.35, + "learning_rate": 4.838597204865625e-05, + "loss": 0.8926, + "step": 6450 + }, + { + "epoch": 0.35, + "learning_rate": 4.838349565865129e-05, + "loss": 0.7616, + "step": 6455 + }, + { + "epoch": 0.35, + "learning_rate": 4.8381017433819284e-05, + "loss": 0.7135, + "step": 6460 + }, + { + "epoch": 0.35, + "learning_rate": 4.837853737435471e-05, + "loss": 0.79, + "step": 6465 + }, + { + "epoch": 0.35, + "learning_rate": 4.8376055480452134e-05, + "loss": 0.7665, + "step": 6470 + }, + { + "epoch": 0.35, + "learning_rate": 4.8373571752306327e-05, + "loss": 0.9513, + "step": 6475 + }, + { + "epoch": 0.35, + "learning_rate": 4.837108619011217e-05, + "loss": 0.7946, + "step": 6480 + }, + { + "epoch": 0.35, + "learning_rate": 4.83685987940647e-05, + "loss": 0.7588, + "step": 6485 + }, + { + "epoch": 0.35, + "learning_rate": 4.8366109564359095e-05, + "loss": 0.8488, + "step": 6490 + }, + { + "epoch": 0.35, + "learning_rate": 4.836361850119069e-05, + "loss": 0.7705, + "step": 6495 + }, + { + "epoch": 0.35, + "learning_rate": 4.836112560475492e-05, + "loss": 0.8837, + "step": 6500 + }, + { + "epoch": 0.35, + "learning_rate": 4.835863087524742e-05, + "loss": 0.7212, + "step": 6505 + }, + { + "epoch": 0.35, + "learning_rate": 4.835613431286394e-05, + "loss": 0.8228, + "step": 6510 + }, + { + "epoch": 0.35, + "learning_rate": 4.835363591780037e-05, + "loss": 0.8566, + "step": 6515 + }, + { + "epoch": 0.35, + "learning_rate": 4.835113569025275e-05, + "loss": 0.7953, + "step": 6520 + }, + { + "epoch": 0.35, + "learning_rate": 4.8348633630417265e-05, + "loss": 0.8232, + "step": 6525 + }, + { + "epoch": 0.35, + "learning_rate": 4.8346129738490256e-05, + "loss": 0.8565, + "step": 6530 + }, + { + "epoch": 0.35, + "learning_rate": 4.834362401466818e-05, + "loss": 0.8472, + "step": 6535 + }, + { + "epoch": 0.35, + "learning_rate": 4.834111645914766e-05, + "loss": 0.8275, + "step": 6540 + }, + { + "epoch": 0.35, + "learning_rate": 4.833860707212545e-05, + "loss": 0.7908, + "step": 6545 + }, + { + "epoch": 0.35, + "learning_rate": 4.833609585379847e-05, + "loss": 0.7838, + "step": 6550 + }, + { + "epoch": 0.35, + "learning_rate": 4.8333582804363756e-05, + "loss": 0.7953, + "step": 6555 + }, + { + "epoch": 0.35, + "learning_rate": 4.833106792401848e-05, + "loss": 0.7829, + "step": 6560 + }, + { + "epoch": 0.35, + "learning_rate": 4.832855121296001e-05, + "loss": 0.8694, + "step": 6565 + }, + { + "epoch": 0.35, + "learning_rate": 4.8326032671385804e-05, + "loss": 0.7641, + "step": 6570 + }, + { + "epoch": 0.35, + "learning_rate": 4.8323512299493495e-05, + "loss": 0.8141, + "step": 6575 + }, + { + "epoch": 0.35, + "learning_rate": 4.8320990097480834e-05, + "loss": 0.7804, + "step": 6580 + }, + { + "epoch": 0.35, + "learning_rate": 4.831846606554574e-05, + "loss": 0.8158, + "step": 6585 + }, + { + "epoch": 0.35, + "learning_rate": 4.831594020388627e-05, + "loss": 0.9951, + "step": 6590 + }, + { + "epoch": 0.35, + "learning_rate": 4.83134125127006e-05, + "loss": 0.8265, + "step": 6595 + }, + { + "epoch": 0.35, + "learning_rate": 4.83108829921871e-05, + "loss": 0.7968, + "step": 6600 + }, + { + "epoch": 0.35, + "learning_rate": 4.830835164254424e-05, + "loss": 0.7983, + "step": 6605 + }, + { + "epoch": 0.35, + "learning_rate": 4.830581846397063e-05, + "loss": 0.7525, + "step": 6610 + }, + { + "epoch": 0.35, + "learning_rate": 4.8303283456665056e-05, + "loss": 0.8614, + "step": 6615 + }, + { + "epoch": 0.35, + "learning_rate": 4.830074662082643e-05, + "loss": 0.7994, + "step": 6620 + }, + { + "epoch": 0.35, + "learning_rate": 4.8298207956653814e-05, + "loss": 0.7816, + "step": 6625 + }, + { + "epoch": 0.35, + "learning_rate": 4.82956674643464e-05, + "loss": 0.9112, + "step": 6630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8293125144103535e-05, + "loss": 0.7195, + "step": 6635 + }, + { + "epoch": 0.36, + "learning_rate": 4.829058099612471e-05, + "loss": 0.8317, + "step": 6640 + }, + { + "epoch": 0.36, + "learning_rate": 4.828803502060956e-05, + "loss": 0.7947, + "step": 6645 + }, + { + "epoch": 0.36, + "learning_rate": 4.828548721775784e-05, + "loss": 0.7425, + "step": 6650 + }, + { + "epoch": 0.36, + "learning_rate": 4.828293758776949e-05, + "loss": 0.8062, + "step": 6655 + }, + { + "epoch": 0.36, + "learning_rate": 4.8280386130844555e-05, + "loss": 0.8531, + "step": 6660 + }, + { + "epoch": 0.36, + "learning_rate": 4.827783284718326e-05, + "loss": 0.87, + "step": 6665 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275277736985924e-05, + "loss": 0.773, + "step": 6670 + }, + { + "epoch": 0.36, + "learning_rate": 4.8272720800453065e-05, + "loss": 0.8636, + "step": 6675 + }, + { + "epoch": 0.36, + "learning_rate": 4.82701620377853e-05, + "loss": 0.8619, + "step": 6680 + }, + { + "epoch": 0.36, + "learning_rate": 4.8267601449183416e-05, + "loss": 0.7709, + "step": 6685 + }, + { + "epoch": 0.36, + "learning_rate": 4.8265039034848324e-05, + "loss": 0.7648, + "step": 6690 + }, + { + "epoch": 0.36, + "learning_rate": 4.82624747949811e-05, + "loss": 0.8286, + "step": 6695 + }, + { + "epoch": 0.36, + "learning_rate": 4.825990872978296e-05, + "loss": 0.8407, + "step": 6700 + }, + { + "epoch": 0.36, + "learning_rate": 4.825734083945523e-05, + "loss": 0.847, + "step": 6705 + }, + { + "epoch": 0.36, + "learning_rate": 4.8254771124199416e-05, + "loss": 0.845, + "step": 6710 + }, + { + "epoch": 0.36, + "learning_rate": 4.825219958421715e-05, + "loss": 0.8051, + "step": 6715 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249626219710234e-05, + "loss": 0.757, + "step": 6720 + }, + { + "epoch": 0.36, + "learning_rate": 4.8247051030880565e-05, + "loss": 0.6884, + "step": 6725 + }, + { + "epoch": 0.36, + "learning_rate": 4.8244474017930215e-05, + "loss": 0.9335, + "step": 6730 + }, + { + "epoch": 0.36, + "learning_rate": 4.82418951810614e-05, + "loss": 0.7192, + "step": 6735 + }, + { + "epoch": 0.36, + "learning_rate": 4.823931452047647e-05, + "loss": 0.8617, + "step": 6740 + }, + { + "epoch": 0.36, + "learning_rate": 4.823673203637794e-05, + "loss": 0.8135, + "step": 6745 + }, + { + "epoch": 0.36, + "learning_rate": 4.8234147728968414e-05, + "loss": 0.853, + "step": 6750 + }, + { + "epoch": 0.36, + "learning_rate": 4.8231561598450706e-05, + "loss": 0.7494, + "step": 6755 + }, + { + "epoch": 0.36, + "learning_rate": 4.8228973645027713e-05, + "loss": 0.7688, + "step": 6760 + }, + { + "epoch": 0.36, + "learning_rate": 4.822638386890253e-05, + "loss": 0.7991, + "step": 6765 + }, + { + "epoch": 0.36, + "learning_rate": 4.822379227027834e-05, + "loss": 0.826, + "step": 6770 + }, + { + "epoch": 0.36, + "learning_rate": 4.8221198849358527e-05, + "loss": 0.8499, + "step": 6775 + }, + { + "epoch": 0.36, + "learning_rate": 4.821860360634657e-05, + "loss": 0.7452, + "step": 6780 + }, + { + "epoch": 0.36, + "learning_rate": 4.8216006541446117e-05, + "loss": 0.7455, + "step": 6785 + }, + { + "epoch": 0.36, + "learning_rate": 4.8213407654860945e-05, + "loss": 0.7513, + "step": 6790 + }, + { + "epoch": 0.36, + "learning_rate": 4.821080694679498e-05, + "loss": 0.8145, + "step": 6795 + }, + { + "epoch": 0.36, + "learning_rate": 4.820820441745231e-05, + "loss": 0.8254, + "step": 6800 + }, + { + "epoch": 0.36, + "learning_rate": 4.820560006703711e-05, + "loss": 0.7937, + "step": 6805 + }, + { + "epoch": 0.36, + "learning_rate": 4.820299389575378e-05, + "loss": 0.7425, + "step": 6810 + }, + { + "epoch": 0.36, + "learning_rate": 4.820038590380678e-05, + "loss": 0.8957, + "step": 6815 + }, + { + "epoch": 0.36, + "learning_rate": 4.8197776091400766e-05, + "loss": 0.8821, + "step": 6820 + }, + { + "epoch": 0.37, + "learning_rate": 4.819516445874052e-05, + "loss": 0.7784, + "step": 6825 + }, + { + "epoch": 0.37, + "learning_rate": 4.819255100603097e-05, + "loss": 0.618, + "step": 6830 + }, + { + "epoch": 0.37, + "learning_rate": 4.818993573347719e-05, + "loss": 0.8112, + "step": 6835 + }, + { + "epoch": 0.37, + "learning_rate": 4.8187318641284376e-05, + "loss": 0.9088, + "step": 6840 + }, + { + "epoch": 0.37, + "learning_rate": 4.8184699729657895e-05, + "loss": 0.8004, + "step": 6845 + }, + { + "epoch": 0.37, + "learning_rate": 4.818207899880325e-05, + "loss": 0.8575, + "step": 6850 + }, + { + "epoch": 0.37, + "learning_rate": 4.817945644892606e-05, + "loss": 0.7702, + "step": 6855 + }, + { + "epoch": 0.37, + "learning_rate": 4.817683208023213e-05, + "loss": 0.9045, + "step": 6860 + }, + { + "epoch": 0.37, + "learning_rate": 4.817420589292738e-05, + "loss": 0.8171, + "step": 6865 + }, + { + "epoch": 0.37, + "learning_rate": 4.8171577887217866e-05, + "loss": 0.9869, + "step": 6870 + }, + { + "epoch": 0.37, + "learning_rate": 4.8168948063309814e-05, + "loss": 0.8393, + "step": 6875 + }, + { + "epoch": 0.37, + "learning_rate": 4.8166316421409574e-05, + "loss": 0.8222, + "step": 6880 + }, + { + "epoch": 0.37, + "learning_rate": 4.8163682961723646e-05, + "loss": 0.8546, + "step": 6885 + }, + { + "epoch": 0.37, + "learning_rate": 4.816104768445865e-05, + "loss": 0.8804, + "step": 6890 + }, + { + "epoch": 0.37, + "learning_rate": 4.815841058982139e-05, + "loss": 0.9524, + "step": 6895 + }, + { + "epoch": 0.37, + "learning_rate": 4.815577167801878e-05, + "loss": 0.7327, + "step": 6900 + }, + { + "epoch": 0.37, + "learning_rate": 4.8153130949257884e-05, + "loss": 0.9141, + "step": 6905 + }, + { + "epoch": 0.37, + "learning_rate": 4.8150488403745925e-05, + "loss": 0.8751, + "step": 6910 + }, + { + "epoch": 0.37, + "learning_rate": 4.8147844041690244e-05, + "loss": 0.7618, + "step": 6915 + }, + { + "epoch": 0.37, + "learning_rate": 4.814519786329833e-05, + "loss": 0.6011, + "step": 6920 + }, + { + "epoch": 0.37, + "learning_rate": 4.814254986877784e-05, + "loss": 0.7932, + "step": 6925 + }, + { + "epoch": 0.37, + "learning_rate": 4.813990005833653e-05, + "loss": 0.8013, + "step": 6930 + }, + { + "epoch": 0.37, + "learning_rate": 4.8137248432182334e-05, + "loss": 0.9176, + "step": 6935 + }, + { + "epoch": 0.37, + "learning_rate": 4.813459499052332e-05, + "loss": 0.8428, + "step": 6940 + }, + { + "epoch": 0.37, + "learning_rate": 4.8131939733567686e-05, + "loss": 0.8801, + "step": 6945 + }, + { + "epoch": 0.37, + "learning_rate": 4.812928266152379e-05, + "loss": 0.9083, + "step": 6950 + }, + { + "epoch": 0.37, + "learning_rate": 4.812662377460012e-05, + "loss": 0.6987, + "step": 6955 + }, + { + "epoch": 0.37, + "learning_rate": 4.812396307300531e-05, + "loss": 0.8763, + "step": 6960 + }, + { + "epoch": 0.37, + "learning_rate": 4.812130055694813e-05, + "loss": 0.7517, + "step": 6965 + }, + { + "epoch": 0.37, + "learning_rate": 4.811863622663752e-05, + "loss": 0.9541, + "step": 6970 + }, + { + "epoch": 0.37, + "learning_rate": 4.811597008228251e-05, + "loss": 0.6921, + "step": 6975 + }, + { + "epoch": 0.37, + "learning_rate": 4.811330212409233e-05, + "loss": 0.6888, + "step": 6980 + }, + { + "epoch": 0.37, + "learning_rate": 4.811063235227632e-05, + "loss": 0.8537, + "step": 6985 + }, + { + "epoch": 0.37, + "learning_rate": 4.810796076704396e-05, + "loss": 0.7204, + "step": 6990 + }, + { + "epoch": 0.37, + "learning_rate": 4.810528736860488e-05, + "loss": 0.6506, + "step": 6995 + }, + { + "epoch": 0.37, + "learning_rate": 4.810261215716887e-05, + "loss": 0.6871, + "step": 7000 + }, + { + "epoch": 0.37, + "learning_rate": 4.8099935132945825e-05, + "loss": 0.7543, + "step": 7005 + }, + { + "epoch": 0.38, + "learning_rate": 4.8097256296145816e-05, + "loss": 0.8341, + "step": 7010 + }, + { + "epoch": 0.38, + "learning_rate": 4.8094575646979036e-05, + "loss": 0.8501, + "step": 7015 + }, + { + "epoch": 0.38, + "learning_rate": 4.8091893185655826e-05, + "loss": 0.8359, + "step": 7020 + }, + { + "epoch": 0.38, + "learning_rate": 4.808920891238667e-05, + "loss": 0.8277, + "step": 7025 + }, + { + "epoch": 0.38, + "learning_rate": 4.8086522827382204e-05, + "loss": 0.7674, + "step": 7030 + }, + { + "epoch": 0.38, + "learning_rate": 4.808383493085319e-05, + "loss": 0.7716, + "step": 7035 + }, + { + "epoch": 0.38, + "learning_rate": 4.808114522301054e-05, + "loss": 0.7889, + "step": 7040 + }, + { + "epoch": 0.38, + "learning_rate": 4.8078453704065294e-05, + "loss": 0.788, + "step": 7045 + }, + { + "epoch": 0.38, + "learning_rate": 4.8075760374228665e-05, + "loss": 0.7184, + "step": 7050 + }, + { + "epoch": 0.38, + "learning_rate": 4.807306523371198e-05, + "loss": 0.742, + "step": 7055 + }, + { + "epoch": 0.38, + "learning_rate": 4.807036828272672e-05, + "loss": 0.8931, + "step": 7060 + }, + { + "epoch": 0.38, + "learning_rate": 4.80676695214845e-05, + "loss": 0.835, + "step": 7065 + }, + { + "epoch": 0.38, + "learning_rate": 4.8064968950197095e-05, + "loss": 0.8265, + "step": 7070 + }, + { + "epoch": 0.38, + "learning_rate": 4.8062266569076405e-05, + "loss": 0.8526, + "step": 7075 + }, + { + "epoch": 0.38, + "learning_rate": 4.8059562378334475e-05, + "loss": 0.7497, + "step": 7080 + }, + { + "epoch": 0.38, + "learning_rate": 4.805685637818349e-05, + "loss": 0.7826, + "step": 7085 + }, + { + "epoch": 0.38, + "learning_rate": 4.8054148568835786e-05, + "loss": 0.842, + "step": 7090 + }, + { + "epoch": 0.38, + "learning_rate": 4.8051438950503845e-05, + "loss": 0.8569, + "step": 7095 + }, + { + "epoch": 0.38, + "learning_rate": 4.804872752340027e-05, + "loss": 0.887, + "step": 7100 + }, + { + "epoch": 0.38, + "learning_rate": 4.804601428773782e-05, + "loss": 0.7942, + "step": 7105 + }, + { + "epoch": 0.38, + "learning_rate": 4.80432992437294e-05, + "loss": 0.6507, + "step": 7110 + }, + { + "epoch": 0.38, + "learning_rate": 4.804058239158804e-05, + "loss": 0.9072, + "step": 7115 + }, + { + "epoch": 0.38, + "learning_rate": 4.803786373152692e-05, + "loss": 0.6998, + "step": 7120 + }, + { + "epoch": 0.38, + "learning_rate": 4.8035143263759386e-05, + "loss": 0.712, + "step": 7125 + }, + { + "epoch": 0.38, + "learning_rate": 4.803242098849889e-05, + "loss": 0.8511, + "step": 7130 + }, + { + "epoch": 0.38, + "learning_rate": 4.802969690595903e-05, + "loss": 0.8195, + "step": 7135 + }, + { + "epoch": 0.38, + "learning_rate": 4.8026971016353584e-05, + "loss": 0.6801, + "step": 7140 + }, + { + "epoch": 0.38, + "learning_rate": 4.802424331989641e-05, + "loss": 0.8112, + "step": 7145 + }, + { + "epoch": 0.38, + "learning_rate": 4.8021513816801565e-05, + "loss": 0.844, + "step": 7150 + }, + { + "epoch": 0.38, + "learning_rate": 4.801878250728322e-05, + "loss": 0.8558, + "step": 7155 + }, + { + "epoch": 0.38, + "learning_rate": 4.801604939155569e-05, + "loss": 0.7182, + "step": 7160 + }, + { + "epoch": 0.38, + "learning_rate": 4.801331446983342e-05, + "loss": 0.814, + "step": 7165 + }, + { + "epoch": 0.38, + "learning_rate": 4.801057774233104e-05, + "loss": 0.877, + "step": 7170 + }, + { + "epoch": 0.38, + "learning_rate": 4.8007839209263276e-05, + "loss": 0.8308, + "step": 7175 + }, + { + "epoch": 0.38, + "learning_rate": 4.8005098870845e-05, + "loss": 0.7684, + "step": 7180 + }, + { + "epoch": 0.38, + "learning_rate": 4.800235672729125e-05, + "loss": 0.8682, + "step": 7185 + }, + { + "epoch": 0.38, + "learning_rate": 4.7999612778817196e-05, + "loss": 0.9712, + "step": 7190 + }, + { + "epoch": 0.38, + "learning_rate": 4.799686702563814e-05, + "loss": 0.788, + "step": 7195 + }, + { + "epoch": 0.39, + "learning_rate": 4.799411946796954e-05, + "loss": 0.8992, + "step": 7200 + }, + { + "epoch": 0.39, + "learning_rate": 4.799137010602698e-05, + "loss": 0.9255, + "step": 7205 + }, + { + "epoch": 0.39, + "learning_rate": 4.79886189400262e-05, + "loss": 0.6577, + "step": 7210 + }, + { + "epoch": 0.39, + "learning_rate": 4.798586597018306e-05, + "loss": 0.6142, + "step": 7215 + }, + { + "epoch": 0.39, + "learning_rate": 4.7983111196713605e-05, + "loss": 0.7263, + "step": 7220 + }, + { + "epoch": 0.39, + "learning_rate": 4.798035461983397e-05, + "loss": 0.8851, + "step": 7225 + }, + { + "epoch": 0.39, + "learning_rate": 4.797759623976045e-05, + "loss": 0.8105, + "step": 7230 + }, + { + "epoch": 0.39, + "learning_rate": 4.797483605670951e-05, + "loss": 0.8346, + "step": 7235 + }, + { + "epoch": 0.39, + "learning_rate": 4.797207407089772e-05, + "loss": 0.7503, + "step": 7240 + }, + { + "epoch": 0.39, + "learning_rate": 4.7969310282541794e-05, + "loss": 0.8372, + "step": 7245 + }, + { + "epoch": 0.39, + "learning_rate": 4.7966544691858615e-05, + "loss": 0.8527, + "step": 7250 + }, + { + "epoch": 0.39, + "learning_rate": 4.796377729906518e-05, + "loss": 0.8144, + "step": 7255 + }, + { + "epoch": 0.39, + "learning_rate": 4.796100810437864e-05, + "loss": 0.8474, + "step": 7260 + }, + { + "epoch": 0.39, + "learning_rate": 4.795823710801628e-05, + "loss": 0.6952, + "step": 7265 + }, + { + "epoch": 0.39, + "learning_rate": 4.795546431019555e-05, + "loss": 0.9214, + "step": 7270 + }, + { + "epoch": 0.39, + "learning_rate": 4.7952689711133994e-05, + "loss": 0.7239, + "step": 7275 + }, + { + "epoch": 0.39, + "learning_rate": 4.7949913311049345e-05, + "loss": 0.9013, + "step": 7280 + }, + { + "epoch": 0.39, + "learning_rate": 4.794713511015945e-05, + "loss": 0.9118, + "step": 7285 + }, + { + "epoch": 0.39, + "learning_rate": 4.794435510868231e-05, + "loss": 0.7586, + "step": 7290 + }, + { + "epoch": 0.39, + "learning_rate": 4.794157330683606e-05, + "loss": 0.6804, + "step": 7295 + }, + { + "epoch": 0.39, + "learning_rate": 4.7938789704838984e-05, + "loss": 0.8301, + "step": 7300 + }, + { + "epoch": 0.39, + "learning_rate": 4.7936004302909486e-05, + "loss": 0.9335, + "step": 7305 + }, + { + "epoch": 0.39, + "learning_rate": 4.793321710126615e-05, + "loss": 0.8204, + "step": 7310 + }, + { + "epoch": 0.39, + "learning_rate": 4.7930428100127664e-05, + "loss": 0.8582, + "step": 7315 + }, + { + "epoch": 0.39, + "learning_rate": 4.792763729971288e-05, + "loss": 0.7994, + "step": 7320 + }, + { + "epoch": 0.39, + "learning_rate": 4.792484470024078e-05, + "loss": 0.662, + "step": 7325 + }, + { + "epoch": 0.39, + "learning_rate": 4.792205030193049e-05, + "loss": 0.7738, + "step": 7330 + }, + { + "epoch": 0.39, + "learning_rate": 4.791925410500127e-05, + "loss": 0.8079, + "step": 7335 + }, + { + "epoch": 0.39, + "learning_rate": 4.791645610967254e-05, + "loss": 0.71, + "step": 7340 + }, + { + "epoch": 0.39, + "learning_rate": 4.791365631616384e-05, + "loss": 0.6819, + "step": 7345 + }, + { + "epoch": 0.39, + "learning_rate": 4.7910854724694864e-05, + "loss": 0.7996, + "step": 7350 + }, + { + "epoch": 0.39, + "learning_rate": 4.790805133548545e-05, + "loss": 0.8071, + "step": 7355 + }, + { + "epoch": 0.39, + "learning_rate": 4.790524614875557e-05, + "loss": 0.8915, + "step": 7360 + }, + { + "epoch": 0.39, + "learning_rate": 4.7902439164725334e-05, + "loss": 0.9415, + "step": 7365 + }, + { + "epoch": 0.39, + "learning_rate": 4.789963038361499e-05, + "loss": 0.854, + "step": 7370 + }, + { + "epoch": 0.39, + "learning_rate": 4.789681980564494e-05, + "loss": 0.716, + "step": 7375 + }, + { + "epoch": 0.39, + "learning_rate": 4.7894007431035726e-05, + "loss": 0.7333, + "step": 7380 + }, + { + "epoch": 0.4, + "learning_rate": 4.7891193260008026e-05, + "loss": 0.7083, + "step": 7385 + }, + { + "epoch": 0.4, + "learning_rate": 4.788837729278265e-05, + "loss": 0.7627, + "step": 7390 + }, + { + "epoch": 0.4, + "learning_rate": 4.788555952958056e-05, + "loss": 0.9727, + "step": 7395 + }, + { + "epoch": 0.4, + "learning_rate": 4.788273997062286e-05, + "loss": 0.9049, + "step": 7400 + }, + { + "epoch": 0.4, + "learning_rate": 4.7879918616130795e-05, + "loss": 0.9316, + "step": 7405 + }, + { + "epoch": 0.4, + "learning_rate": 4.7877095466325745e-05, + "loss": 0.8453, + "step": 7410 + }, + { + "epoch": 0.4, + "learning_rate": 4.787427052142923e-05, + "loss": 0.8353, + "step": 7415 + }, + { + "epoch": 0.4, + "learning_rate": 4.787144378166291e-05, + "loss": 0.8048, + "step": 7420 + }, + { + "epoch": 0.4, + "learning_rate": 4.7868615247248604e-05, + "loss": 0.8047, + "step": 7425 + }, + { + "epoch": 0.4, + "learning_rate": 4.786578491840825e-05, + "loss": 0.7862, + "step": 7430 + }, + { + "epoch": 0.4, + "learning_rate": 4.7862952795363925e-05, + "loss": 0.7956, + "step": 7435 + }, + { + "epoch": 0.4, + "learning_rate": 4.786011887833788e-05, + "loss": 0.7657, + "step": 7440 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857283167552455e-05, + "loss": 0.8208, + "step": 7445 + }, + { + "epoch": 0.4, + "learning_rate": 4.785444566323018e-05, + "loss": 0.9138, + "step": 7450 + }, + { + "epoch": 0.4, + "learning_rate": 4.78516063655937e-05, + "loss": 0.7994, + "step": 7455 + }, + { + "epoch": 0.4, + "learning_rate": 4.78487652748658e-05, + "loss": 0.7435, + "step": 7460 + }, + { + "epoch": 0.4, + "learning_rate": 4.7845922391269423e-05, + "loss": 0.7738, + "step": 7465 + }, + { + "epoch": 0.4, + "learning_rate": 4.7843077715027626e-05, + "loss": 0.7367, + "step": 7470 + }, + { + "epoch": 0.4, + "learning_rate": 4.7840231246363634e-05, + "loss": 0.831, + "step": 7475 + }, + { + "epoch": 0.4, + "learning_rate": 4.783738298550079e-05, + "loss": 0.7333, + "step": 7480 + }, + { + "epoch": 0.4, + "learning_rate": 4.783453293266259e-05, + "loss": 0.8165, + "step": 7485 + }, + { + "epoch": 0.4, + "learning_rate": 4.7831681088072676e-05, + "loss": 0.7677, + "step": 7490 + }, + { + "epoch": 0.4, + "learning_rate": 4.782882745195482e-05, + "loss": 0.72, + "step": 7495 + }, + { + "epoch": 0.4, + "learning_rate": 4.782597202453293e-05, + "loss": 0.6487, + "step": 7500 + }, + { + "epoch": 0.4, + "learning_rate": 4.782311480603107e-05, + "loss": 0.8733, + "step": 7505 + }, + { + "epoch": 0.4, + "learning_rate": 4.782025579667343e-05, + "loss": 0.7825, + "step": 7510 + }, + { + "epoch": 0.4, + "learning_rate": 4.7817394996684354e-05, + "loss": 0.7005, + "step": 7515 + }, + { + "epoch": 0.4, + "learning_rate": 4.781453240628831e-05, + "loss": 0.7942, + "step": 7520 + }, + { + "epoch": 0.4, + "learning_rate": 4.781166802570994e-05, + "loss": 0.8163, + "step": 7525 + }, + { + "epoch": 0.4, + "learning_rate": 4.7808801855173976e-05, + "loss": 0.9127, + "step": 7530 + }, + { + "epoch": 0.4, + "learning_rate": 4.7805933894905324e-05, + "loss": 0.8118, + "step": 7535 + }, + { + "epoch": 0.4, + "learning_rate": 4.780306414512903e-05, + "loss": 0.7944, + "step": 7540 + }, + { + "epoch": 0.4, + "learning_rate": 4.780019260607027e-05, + "loss": 0.7226, + "step": 7545 + }, + { + "epoch": 0.4, + "learning_rate": 4.7797319277954366e-05, + "loss": 0.8094, + "step": 7550 + }, + { + "epoch": 0.4, + "learning_rate": 4.779444416100677e-05, + "loss": 0.7522, + "step": 7555 + }, + { + "epoch": 0.4, + "learning_rate": 4.7791567255453104e-05, + "loss": 0.8632, + "step": 7560 + }, + { + "epoch": 0.4, + "learning_rate": 4.7788688561519085e-05, + "loss": 0.6781, + "step": 7565 + }, + { + "epoch": 0.4, + "learning_rate": 4.778580807943061e-05, + "loss": 0.8259, + "step": 7570 + }, + { + "epoch": 0.41, + "learning_rate": 4.7782925809413695e-05, + "loss": 0.6863, + "step": 7575 + }, + { + "epoch": 0.41, + "learning_rate": 4.77800417516945e-05, + "loss": 0.8084, + "step": 7580 + }, + { + "epoch": 0.41, + "learning_rate": 4.777715590649935e-05, + "loss": 0.7288, + "step": 7585 + }, + { + "epoch": 0.41, + "learning_rate": 4.777426827405465e-05, + "loss": 0.7919, + "step": 7590 + }, + { + "epoch": 0.41, + "learning_rate": 4.777137885458701e-05, + "loss": 0.7569, + "step": 7595 + }, + { + "epoch": 0.41, + "learning_rate": 4.7768487648323144e-05, + "loss": 0.7948, + "step": 7600 + }, + { + "epoch": 0.41, + "learning_rate": 4.776559465548992e-05, + "loss": 0.8371, + "step": 7605 + }, + { + "epoch": 0.41, + "learning_rate": 4.776269987631434e-05, + "loss": 0.7119, + "step": 7610 + }, + { + "epoch": 0.41, + "learning_rate": 4.775980331102356e-05, + "loss": 0.7255, + "step": 7615 + }, + { + "epoch": 0.41, + "learning_rate": 4.7756904959844835e-05, + "loss": 0.7239, + "step": 7620 + }, + { + "epoch": 0.41, + "learning_rate": 4.775400482300561e-05, + "loss": 0.8782, + "step": 7625 + }, + { + "epoch": 0.41, + "learning_rate": 4.775110290073344e-05, + "loss": 0.9146, + "step": 7630 + }, + { + "epoch": 0.41, + "learning_rate": 4.774819919325605e-05, + "loss": 0.7668, + "step": 7635 + }, + { + "epoch": 0.41, + "learning_rate": 4.774529370080125e-05, + "loss": 0.9005, + "step": 7640 + }, + { + "epoch": 0.41, + "learning_rate": 4.7742386423597064e-05, + "loss": 0.8589, + "step": 7645 + }, + { + "epoch": 0.41, + "learning_rate": 4.773947736187158e-05, + "loss": 0.7268, + "step": 7650 + }, + { + "epoch": 0.41, + "learning_rate": 4.773656651585309e-05, + "loss": 0.806, + "step": 7655 + }, + { + "epoch": 0.41, + "learning_rate": 4.773365388576998e-05, + "loss": 0.8754, + "step": 7660 + }, + { + "epoch": 0.41, + "learning_rate": 4.77307394718508e-05, + "loss": 0.6792, + "step": 7665 + }, + { + "epoch": 0.41, + "learning_rate": 4.772782327432425e-05, + "loss": 0.7556, + "step": 7670 + }, + { + "epoch": 0.41, + "learning_rate": 4.772490529341913e-05, + "loss": 0.8999, + "step": 7675 + }, + { + "epoch": 0.41, + "learning_rate": 4.772198552936442e-05, + "loss": 0.7984, + "step": 7680 + }, + { + "epoch": 0.41, + "learning_rate": 4.771906398238922e-05, + "loss": 0.6748, + "step": 7685 + }, + { + "epoch": 0.41, + "learning_rate": 4.7716140652722774e-05, + "loss": 0.9188, + "step": 7690 + }, + { + "epoch": 0.41, + "learning_rate": 4.771321554059446e-05, + "loss": 0.7875, + "step": 7695 + }, + { + "epoch": 0.41, + "learning_rate": 4.771028864623382e-05, + "loss": 0.7906, + "step": 7700 + }, + { + "epoch": 0.41, + "learning_rate": 4.77073599698705e-05, + "loss": 0.8845, + "step": 7705 + }, + { + "epoch": 0.41, + "learning_rate": 4.7704429511734314e-05, + "loss": 0.793, + "step": 7710 + }, + { + "epoch": 0.41, + "learning_rate": 4.770149727205521e-05, + "loss": 0.7802, + "step": 7715 + }, + { + "epoch": 0.41, + "learning_rate": 4.769856325106325e-05, + "loss": 0.9725, + "step": 7720 + }, + { + "epoch": 0.41, + "learning_rate": 4.769562744898868e-05, + "loss": 0.8408, + "step": 7725 + }, + { + "epoch": 0.41, + "learning_rate": 4.769268986606185e-05, + "loss": 0.8148, + "step": 7730 + }, + { + "epoch": 0.41, + "learning_rate": 4.7689750502513255e-05, + "loss": 0.9142, + "step": 7735 + }, + { + "epoch": 0.41, + "learning_rate": 4.768680935857356e-05, + "loss": 0.8864, + "step": 7740 + }, + { + "epoch": 0.41, + "learning_rate": 4.7683866434473536e-05, + "loss": 0.7944, + "step": 7745 + }, + { + "epoch": 0.41, + "learning_rate": 4.76809217304441e-05, + "loss": 0.8669, + "step": 7750 + }, + { + "epoch": 0.41, + "learning_rate": 4.767797524671632e-05, + "loss": 0.8018, + "step": 7755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7675026983521395e-05, + "loss": 0.7345, + "step": 7760 + }, + { + "epoch": 0.42, + "learning_rate": 4.767207694109066e-05, + "loss": 0.6435, + "step": 7765 + }, + { + "epoch": 0.42, + "learning_rate": 4.7669125119655604e-05, + "loss": 0.7386, + "step": 7770 + }, + { + "epoch": 0.42, + "learning_rate": 4.766617151944784e-05, + "loss": 0.7342, + "step": 7775 + }, + { + "epoch": 0.42, + "learning_rate": 4.7663216140699144e-05, + "loss": 0.8328, + "step": 7780 + }, + { + "epoch": 0.42, + "learning_rate": 4.7660258983641384e-05, + "loss": 0.8689, + "step": 7785 + }, + { + "epoch": 0.42, + "learning_rate": 4.7657300048506624e-05, + "loss": 0.9127, + "step": 7790 + }, + { + "epoch": 0.42, + "learning_rate": 4.765433933552703e-05, + "loss": 0.8793, + "step": 7795 + }, + { + "epoch": 0.42, + "learning_rate": 4.765137684493493e-05, + "loss": 0.8439, + "step": 7800 + }, + { + "epoch": 0.42, + "learning_rate": 4.7648412576962776e-05, + "loss": 0.6499, + "step": 7805 + }, + { + "epoch": 0.42, + "learning_rate": 4.764544653184316e-05, + "loss": 0.7619, + "step": 7810 + }, + { + "epoch": 0.42, + "learning_rate": 4.764247870980882e-05, + "loss": 0.9041, + "step": 7815 + }, + { + "epoch": 0.42, + "learning_rate": 4.763950911109263e-05, + "loss": 0.7256, + "step": 7820 + }, + { + "epoch": 0.42, + "learning_rate": 4.7636537735927613e-05, + "loss": 0.8597, + "step": 7825 + }, + { + "epoch": 0.42, + "learning_rate": 4.7633564584546916e-05, + "loss": 0.8381, + "step": 7830 + }, + { + "epoch": 0.42, + "learning_rate": 4.7630589657183835e-05, + "loss": 0.8253, + "step": 7835 + }, + { + "epoch": 0.42, + "learning_rate": 4.76276129540718e-05, + "loss": 0.723, + "step": 7840 + }, + { + "epoch": 0.42, + "learning_rate": 4.762463447544438e-05, + "loss": 0.8066, + "step": 7845 + }, + { + "epoch": 0.42, + "learning_rate": 4.7621654221535293e-05, + "loss": 0.809, + "step": 7850 + }, + { + "epoch": 0.42, + "learning_rate": 4.76186721925784e-05, + "loss": 0.7983, + "step": 7855 + }, + { + "epoch": 0.42, + "learning_rate": 4.7615688388807665e-05, + "loss": 0.7596, + "step": 7860 + }, + { + "epoch": 0.42, + "learning_rate": 4.761270281045724e-05, + "loss": 0.9377, + "step": 7865 + }, + { + "epoch": 0.42, + "learning_rate": 4.7609715457761386e-05, + "loss": 0.7259, + "step": 7870 + }, + { + "epoch": 0.42, + "learning_rate": 4.760672633095451e-05, + "loss": 0.8541, + "step": 7875 + }, + { + "epoch": 0.42, + "learning_rate": 4.760373543027116e-05, + "loss": 0.9423, + "step": 7880 + }, + { + "epoch": 0.42, + "learning_rate": 4.760074275594602e-05, + "loss": 0.7212, + "step": 7885 + }, + { + "epoch": 0.42, + "learning_rate": 4.759774830821392e-05, + "loss": 0.8465, + "step": 7890 + }, + { + "epoch": 0.42, + "learning_rate": 4.759475208730982e-05, + "loss": 0.987, + "step": 7895 + }, + { + "epoch": 0.42, + "learning_rate": 4.7591754093468834e-05, + "loss": 0.7307, + "step": 7900 + }, + { + "epoch": 0.42, + "learning_rate": 4.7588754326926184e-05, + "loss": 0.8346, + "step": 7905 + }, + { + "epoch": 0.42, + "learning_rate": 4.758575278791727e-05, + "loss": 0.7288, + "step": 7910 + }, + { + "epoch": 0.42, + "learning_rate": 4.7582749476677605e-05, + "loss": 0.7893, + "step": 7915 + }, + { + "epoch": 0.42, + "learning_rate": 4.7579744393442855e-05, + "loss": 0.8403, + "step": 7920 + }, + { + "epoch": 0.42, + "learning_rate": 4.757673753844881e-05, + "loss": 0.7984, + "step": 7925 + }, + { + "epoch": 0.42, + "learning_rate": 4.7573728911931424e-05, + "loss": 0.8746, + "step": 7930 + }, + { + "epoch": 0.42, + "learning_rate": 4.7570718514126764e-05, + "loss": 0.8429, + "step": 7935 + }, + { + "epoch": 0.42, + "learning_rate": 4.756770634527104e-05, + "loss": 0.8346, + "step": 7940 + }, + { + "epoch": 0.43, + "learning_rate": 4.7564692405600616e-05, + "loss": 0.8847, + "step": 7945 + }, + { + "epoch": 0.43, + "learning_rate": 4.7561676695351976e-05, + "loss": 0.6616, + "step": 7950 + }, + { + "epoch": 0.43, + "learning_rate": 4.755865921476177e-05, + "loss": 0.804, + "step": 7955 + }, + { + "epoch": 0.43, + "learning_rate": 4.7555639964066754e-05, + "loss": 0.7363, + "step": 7960 + }, + { + "epoch": 0.43, + "learning_rate": 4.755261894350385e-05, + "loss": 0.8548, + "step": 7965 + }, + { + "epoch": 0.43, + "learning_rate": 4.75495961533101e-05, + "loss": 0.8168, + "step": 7970 + }, + { + "epoch": 0.43, + "learning_rate": 4.75465715937227e-05, + "loss": 0.6345, + "step": 7975 + }, + { + "epoch": 0.43, + "learning_rate": 4.754354526497897e-05, + "loss": 0.9102, + "step": 7980 + }, + { + "epoch": 0.43, + "learning_rate": 4.754051716731638e-05, + "loss": 0.7096, + "step": 7985 + }, + { + "epoch": 0.43, + "learning_rate": 4.7537487300972525e-05, + "loss": 0.7356, + "step": 7990 + }, + { + "epoch": 0.43, + "learning_rate": 4.7534455666185166e-05, + "loss": 0.688, + "step": 7995 + }, + { + "epoch": 0.43, + "learning_rate": 4.753142226319218e-05, + "loss": 0.689, + "step": 8000 + }, + { + "epoch": 0.43, + "learning_rate": 4.752838709223158e-05, + "loss": 0.7881, + "step": 8005 + }, + { + "epoch": 0.43, + "learning_rate": 4.7525350153541534e-05, + "loss": 0.9296, + "step": 8010 + }, + { + "epoch": 0.43, + "learning_rate": 4.7522311447360343e-05, + "loss": 0.8627, + "step": 8015 + }, + { + "epoch": 0.43, + "learning_rate": 4.7519270973926433e-05, + "loss": 0.793, + "step": 8020 + }, + { + "epoch": 0.43, + "learning_rate": 4.751622873347838e-05, + "loss": 0.7584, + "step": 8025 + }, + { + "epoch": 0.43, + "learning_rate": 4.751318472625492e-05, + "loss": 0.8536, + "step": 8030 + }, + { + "epoch": 0.43, + "learning_rate": 4.751013895249489e-05, + "loss": 0.8413, + "step": 8035 + }, + { + "epoch": 0.43, + "learning_rate": 4.7507091412437276e-05, + "loss": 0.8454, + "step": 8040 + }, + { + "epoch": 0.43, + "learning_rate": 4.750404210632122e-05, + "loss": 0.7701, + "step": 8045 + }, + { + "epoch": 0.43, + "learning_rate": 4.750099103438599e-05, + "loss": 0.8004, + "step": 8050 + }, + { + "epoch": 0.43, + "learning_rate": 4.7497938196871e-05, + "loss": 0.9889, + "step": 8055 + }, + { + "epoch": 0.43, + "learning_rate": 4.749488359401577e-05, + "loss": 0.7515, + "step": 8060 + }, + { + "epoch": 0.43, + "learning_rate": 4.7491827226060014e-05, + "loss": 0.9324, + "step": 8065 + }, + { + "epoch": 0.43, + "learning_rate": 4.7488769093243535e-05, + "loss": 0.8818, + "step": 8070 + }, + { + "epoch": 0.43, + "learning_rate": 4.748570919580631e-05, + "loss": 0.7374, + "step": 8075 + }, + { + "epoch": 0.43, + "learning_rate": 4.7482647533988435e-05, + "loss": 0.9266, + "step": 8080 + }, + { + "epoch": 0.43, + "learning_rate": 4.7479584108030136e-05, + "loss": 0.7565, + "step": 8085 + }, + { + "epoch": 0.43, + "learning_rate": 4.747651891817181e-05, + "loss": 0.8086, + "step": 8090 + }, + { + "epoch": 0.43, + "learning_rate": 4.747345196465396e-05, + "loss": 0.9391, + "step": 8095 + }, + { + "epoch": 0.43, + "learning_rate": 4.7470383247717244e-05, + "loss": 0.6459, + "step": 8100 + }, + { + "epoch": 0.43, + "learning_rate": 4.746731276760245e-05, + "loss": 0.7412, + "step": 8105 + }, + { + "epoch": 0.43, + "learning_rate": 4.746424052455052e-05, + "loss": 0.7047, + "step": 8110 + }, + { + "epoch": 0.43, + "learning_rate": 4.74611665188025e-05, + "loss": 0.7226, + "step": 8115 + }, + { + "epoch": 0.43, + "learning_rate": 4.745809075059963e-05, + "loss": 0.8196, + "step": 8120 + }, + { + "epoch": 0.43, + "learning_rate": 4.745501322018322e-05, + "loss": 0.5435, + "step": 8125 + }, + { + "epoch": 0.43, + "learning_rate": 4.7451933927794777e-05, + "loss": 0.8914, + "step": 8130 + }, + { + "epoch": 0.44, + "learning_rate": 4.744885287367592e-05, + "loss": 0.8608, + "step": 8135 + }, + { + "epoch": 0.44, + "learning_rate": 4.744577005806841e-05, + "loss": 0.859, + "step": 8140 + }, + { + "epoch": 0.44, + "learning_rate": 4.744268548121414e-05, + "loss": 0.8198, + "step": 8145 + }, + { + "epoch": 0.44, + "learning_rate": 4.743959914335515e-05, + "loss": 0.882, + "step": 8150 + }, + { + "epoch": 0.44, + "learning_rate": 4.743651104473361e-05, + "loss": 0.735, + "step": 8155 + }, + { + "epoch": 0.44, + "learning_rate": 4.743342118559184e-05, + "loss": 0.6242, + "step": 8160 + }, + { + "epoch": 0.44, + "learning_rate": 4.7430329566172284e-05, + "loss": 0.7974, + "step": 8165 + }, + { + "epoch": 0.44, + "learning_rate": 4.742723618671754e-05, + "loss": 0.7568, + "step": 8170 + }, + { + "epoch": 0.44, + "learning_rate": 4.7424141047470326e-05, + "loss": 0.8233, + "step": 8175 + }, + { + "epoch": 0.44, + "learning_rate": 4.742104414867352e-05, + "loss": 0.7301, + "step": 8180 + }, + { + "epoch": 0.44, + "learning_rate": 4.741794549057012e-05, + "loss": 0.7417, + "step": 8185 + }, + { + "epoch": 0.44, + "learning_rate": 4.741484507340326e-05, + "loss": 0.8588, + "step": 8190 + }, + { + "epoch": 0.44, + "learning_rate": 4.741174289741622e-05, + "loss": 0.633, + "step": 8195 + }, + { + "epoch": 0.44, + "learning_rate": 4.740863896285243e-05, + "loss": 0.7923, + "step": 8200 + }, + { + "epoch": 0.44, + "learning_rate": 4.740553326995544e-05, + "loss": 0.8328, + "step": 8205 + }, + { + "epoch": 0.44, + "learning_rate": 4.740242581896894e-05, + "loss": 0.6267, + "step": 8210 + }, + { + "epoch": 0.44, + "learning_rate": 4.739931661013676e-05, + "loss": 0.8507, + "step": 8215 + }, + { + "epoch": 0.44, + "learning_rate": 4.739620564370288e-05, + "loss": 0.8477, + "step": 8220 + }, + { + "epoch": 0.44, + "learning_rate": 4.7393092919911394e-05, + "loss": 0.8198, + "step": 8225 + }, + { + "epoch": 0.44, + "learning_rate": 4.738997843900656e-05, + "loss": 0.7434, + "step": 8230 + }, + { + "epoch": 0.44, + "learning_rate": 4.738686220123276e-05, + "loss": 0.8423, + "step": 8235 + }, + { + "epoch": 0.44, + "learning_rate": 4.738374420683451e-05, + "loss": 0.7117, + "step": 8240 + }, + { + "epoch": 0.44, + "learning_rate": 4.738062445605646e-05, + "loss": 0.7681, + "step": 8245 + }, + { + "epoch": 0.44, + "learning_rate": 4.7377502949143423e-05, + "loss": 0.7729, + "step": 8250 + }, + { + "epoch": 0.44, + "learning_rate": 4.737437968634033e-05, + "loss": 0.8329, + "step": 8255 + }, + { + "epoch": 0.44, + "learning_rate": 4.737125466789224e-05, + "loss": 0.8426, + "step": 8260 + }, + { + "epoch": 0.44, + "learning_rate": 4.736812789404438e-05, + "loss": 0.6936, + "step": 8265 + }, + { + "epoch": 0.44, + "learning_rate": 4.736499936504209e-05, + "loss": 0.763, + "step": 8270 + }, + { + "epoch": 0.44, + "learning_rate": 4.736186908113086e-05, + "loss": 0.9509, + "step": 8275 + }, + { + "epoch": 0.44, + "learning_rate": 4.73587370425563e-05, + "loss": 0.823, + "step": 8280 + }, + { + "epoch": 0.44, + "learning_rate": 4.735560324956419e-05, + "loss": 0.9261, + "step": 8285 + }, + { + "epoch": 0.44, + "learning_rate": 4.735246770240042e-05, + "loss": 0.7226, + "step": 8290 + }, + { + "epoch": 0.44, + "learning_rate": 4.734933040131102e-05, + "loss": 0.8563, + "step": 8295 + }, + { + "epoch": 0.44, + "learning_rate": 4.734619134654217e-05, + "loss": 0.8009, + "step": 8300 + }, + { + "epoch": 0.44, + "learning_rate": 4.7343050538340186e-05, + "loss": 0.8793, + "step": 8305 + }, + { + "epoch": 0.44, + "learning_rate": 4.73399079769515e-05, + "loss": 0.8894, + "step": 8310 + }, + { + "epoch": 0.44, + "learning_rate": 4.733676366262272e-05, + "loss": 0.8194, + "step": 8315 + }, + { + "epoch": 0.45, + "learning_rate": 4.733361759560055e-05, + "loss": 0.8656, + "step": 8320 + }, + { + "epoch": 0.45, + "learning_rate": 4.7330469776131884e-05, + "loss": 0.85, + "step": 8325 + }, + { + "epoch": 0.45, + "learning_rate": 4.7327320204463675e-05, + "loss": 0.8959, + "step": 8330 + }, + { + "epoch": 0.45, + "learning_rate": 4.73241688808431e-05, + "loss": 0.8641, + "step": 8335 + }, + { + "epoch": 0.45, + "learning_rate": 4.732101580551741e-05, + "loss": 0.7719, + "step": 8340 + }, + { + "epoch": 0.45, + "learning_rate": 4.731786097873403e-05, + "loss": 0.8282, + "step": 8345 + }, + { + "epoch": 0.45, + "learning_rate": 4.73147044007405e-05, + "loss": 0.8455, + "step": 8350 + }, + { + "epoch": 0.45, + "learning_rate": 4.7311546071784506e-05, + "loss": 0.7587, + "step": 8355 + }, + { + "epoch": 0.45, + "learning_rate": 4.730838599211388e-05, + "loss": 0.7136, + "step": 8360 + }, + { + "epoch": 0.45, + "learning_rate": 4.730522416197657e-05, + "loss": 0.79, + "step": 8365 + }, + { + "epoch": 0.45, + "learning_rate": 4.730206058162069e-05, + "loss": 0.8731, + "step": 8370 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298895251294464e-05, + "loss": 0.817, + "step": 8375 + }, + { + "epoch": 0.45, + "learning_rate": 4.729572817124627e-05, + "loss": 0.8334, + "step": 8380 + }, + { + "epoch": 0.45, + "learning_rate": 4.729255934172462e-05, + "loss": 0.7309, + "step": 8385 + }, + { + "epoch": 0.45, + "learning_rate": 4.728938876297816e-05, + "loss": 0.7559, + "step": 8390 + }, + { + "epoch": 0.45, + "learning_rate": 4.7286216435255685e-05, + "loss": 0.7146, + "step": 8395 + }, + { + "epoch": 0.45, + "learning_rate": 4.728304235880609e-05, + "loss": 0.8805, + "step": 8400 + }, + { + "epoch": 0.45, + "learning_rate": 4.727986653387846e-05, + "loss": 0.9933, + "step": 8405 + }, + { + "epoch": 0.45, + "learning_rate": 4.727668896072199e-05, + "loss": 0.6786, + "step": 8410 + }, + { + "epoch": 0.45, + "learning_rate": 4.7273509639586e-05, + "loss": 0.8467, + "step": 8415 + }, + { + "epoch": 0.45, + "learning_rate": 4.727032857071997e-05, + "loss": 0.8853, + "step": 8420 + }, + { + "epoch": 0.45, + "learning_rate": 4.72671457543735e-05, + "loss": 0.8622, + "step": 8425 + }, + { + "epoch": 0.45, + "learning_rate": 4.7263961190796353e-05, + "loss": 0.6195, + "step": 8430 + }, + { + "epoch": 0.45, + "learning_rate": 4.7260774880238396e-05, + "loss": 0.8503, + "step": 8435 + }, + { + "epoch": 0.45, + "learning_rate": 4.7257586822949654e-05, + "loss": 0.7259, + "step": 8440 + }, + { + "epoch": 0.45, + "learning_rate": 4.725439701918028e-05, + "loss": 0.737, + "step": 8445 + }, + { + "epoch": 0.45, + "learning_rate": 4.725120546918057e-05, + "loss": 0.8469, + "step": 8450 + }, + { + "epoch": 0.45, + "learning_rate": 4.724801217320095e-05, + "loss": 0.7303, + "step": 8455 + }, + { + "epoch": 0.45, + "learning_rate": 4.7244817131492004e-05, + "loss": 0.7396, + "step": 8460 + }, + { + "epoch": 0.45, + "learning_rate": 4.724162034430442e-05, + "loss": 0.946, + "step": 8465 + }, + { + "epoch": 0.45, + "learning_rate": 4.723842181188903e-05, + "loss": 0.7625, + "step": 8470 + }, + { + "epoch": 0.45, + "learning_rate": 4.723522153449684e-05, + "loss": 0.8024, + "step": 8475 + }, + { + "epoch": 0.45, + "learning_rate": 4.7232019512378954e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.45, + "learning_rate": 4.7228815745786616e-05, + "loss": 0.6964, + "step": 8485 + }, + { + "epoch": 0.45, + "learning_rate": 4.722561023497123e-05, + "loss": 0.8487, + "step": 8490 + }, + { + "epoch": 0.45, + "learning_rate": 4.72224029801843e-05, + "loss": 0.8217, + "step": 8495 + }, + { + "epoch": 0.45, + "learning_rate": 4.721919398167751e-05, + "loss": 0.8176, + "step": 8500 + }, + { + "epoch": 0.45, + "learning_rate": 4.721598323970265e-05, + "loss": 0.7314, + "step": 8505 + }, + { + "epoch": 0.46, + "learning_rate": 4.721277075451166e-05, + "loss": 0.9069, + "step": 8510 + }, + { + "epoch": 0.46, + "learning_rate": 4.7209556526356616e-05, + "loss": 0.8434, + "step": 8515 + }, + { + "epoch": 0.46, + "learning_rate": 4.7206340555489714e-05, + "loss": 0.6144, + "step": 8520 + }, + { + "epoch": 0.46, + "learning_rate": 4.7203122842163316e-05, + "loss": 0.8412, + "step": 8525 + }, + { + "epoch": 0.46, + "learning_rate": 4.719990338662989e-05, + "loss": 0.6959, + "step": 8530 + }, + { + "epoch": 0.46, + "learning_rate": 4.7196682189142083e-05, + "loss": 0.6124, + "step": 8535 + }, + { + "epoch": 0.46, + "learning_rate": 4.719345924995263e-05, + "loss": 0.8663, + "step": 8540 + }, + { + "epoch": 0.46, + "learning_rate": 4.7190234569314426e-05, + "loss": 0.8179, + "step": 8545 + }, + { + "epoch": 0.46, + "learning_rate": 4.718700814748051e-05, + "loss": 0.7653, + "step": 8550 + }, + { + "epoch": 0.46, + "learning_rate": 4.7183779984704034e-05, + "loss": 0.8646, + "step": 8555 + }, + { + "epoch": 0.46, + "learning_rate": 4.718055008123832e-05, + "loss": 0.7132, + "step": 8560 + }, + { + "epoch": 0.46, + "learning_rate": 4.71773184373368e-05, + "loss": 0.8337, + "step": 8565 + }, + { + "epoch": 0.46, + "learning_rate": 4.717408505325305e-05, + "loss": 0.6969, + "step": 8570 + }, + { + "epoch": 0.46, + "learning_rate": 4.717084992924078e-05, + "loss": 0.7786, + "step": 8575 + }, + { + "epoch": 0.46, + "learning_rate": 4.716761306555384e-05, + "loss": 0.863, + "step": 8580 + }, + { + "epoch": 0.46, + "learning_rate": 4.7164374462446224e-05, + "loss": 0.8722, + "step": 8585 + }, + { + "epoch": 0.46, + "learning_rate": 4.7161134120172045e-05, + "loss": 0.8318, + "step": 8590 + }, + { + "epoch": 0.46, + "learning_rate": 4.7157892038985575e-05, + "loss": 0.7558, + "step": 8595 + }, + { + "epoch": 0.46, + "learning_rate": 4.7154648219141195e-05, + "loss": 0.7415, + "step": 8600 + }, + { + "epoch": 0.46, + "learning_rate": 4.715140266089345e-05, + "loss": 0.645, + "step": 8605 + }, + { + "epoch": 0.46, + "learning_rate": 4.7148155364496994e-05, + "loss": 0.8102, + "step": 8610 + }, + { + "epoch": 0.46, + "learning_rate": 4.714490633020664e-05, + "loss": 0.8066, + "step": 8615 + }, + { + "epoch": 0.46, + "learning_rate": 4.7141655558277335e-05, + "loss": 0.8735, + "step": 8620 + }, + { + "epoch": 0.46, + "learning_rate": 4.713840304896415e-05, + "loss": 0.7703, + "step": 8625 + }, + { + "epoch": 0.46, + "learning_rate": 4.7135148802522294e-05, + "loss": 0.7764, + "step": 8630 + }, + { + "epoch": 0.46, + "learning_rate": 4.713189281920712e-05, + "loss": 0.771, + "step": 8635 + }, + { + "epoch": 0.46, + "learning_rate": 4.712863509927413e-05, + "loss": 0.6983, + "step": 8640 + }, + { + "epoch": 0.46, + "learning_rate": 4.712537564297892e-05, + "loss": 0.6602, + "step": 8645 + }, + { + "epoch": 0.46, + "learning_rate": 4.712211445057727e-05, + "loss": 0.855, + "step": 8650 + }, + { + "epoch": 0.46, + "learning_rate": 4.711885152232507e-05, + "loss": 0.8558, + "step": 8655 + }, + { + "epoch": 0.46, + "learning_rate": 4.711558685847834e-05, + "loss": 0.838, + "step": 8660 + }, + { + "epoch": 0.46, + "learning_rate": 4.711232045929327e-05, + "loss": 0.7582, + "step": 8665 + }, + { + "epoch": 0.46, + "learning_rate": 4.710905232502614e-05, + "loss": 0.7527, + "step": 8670 + }, + { + "epoch": 0.46, + "learning_rate": 4.71057824559334e-05, + "loss": 0.7696, + "step": 8675 + }, + { + "epoch": 0.46, + "learning_rate": 4.710251085227163e-05, + "loss": 0.7761, + "step": 8680 + }, + { + "epoch": 0.46, + "learning_rate": 4.709923751429755e-05, + "loss": 0.8355, + "step": 8685 + }, + { + "epoch": 0.46, + "learning_rate": 4.7095962442267974e-05, + "loss": 0.7672, + "step": 8690 + }, + { + "epoch": 0.47, + "learning_rate": 4.7092685636439926e-05, + "loss": 0.8688, + "step": 8695 + }, + { + "epoch": 0.47, + "learning_rate": 4.708940709707051e-05, + "loss": 0.6361, + "step": 8700 + }, + { + "epoch": 0.47, + "learning_rate": 4.7086126824416965e-05, + "loss": 0.7768, + "step": 8705 + }, + { + "epoch": 0.47, + "learning_rate": 4.708284481873672e-05, + "loss": 0.7763, + "step": 8710 + }, + { + "epoch": 0.47, + "learning_rate": 4.7079561080287274e-05, + "loss": 0.7752, + "step": 8715 + }, + { + "epoch": 0.47, + "learning_rate": 4.707627560932629e-05, + "loss": 0.8171, + "step": 8720 + }, + { + "epoch": 0.47, + "learning_rate": 4.7072988406111595e-05, + "loss": 0.8556, + "step": 8725 + }, + { + "epoch": 0.47, + "learning_rate": 4.70696994709011e-05, + "loss": 0.6163, + "step": 8730 + }, + { + "epoch": 0.47, + "learning_rate": 4.706640880395289e-05, + "loss": 0.8174, + "step": 8735 + }, + { + "epoch": 0.47, + "learning_rate": 4.706311640552517e-05, + "loss": 0.7525, + "step": 8740 + }, + { + "epoch": 0.47, + "learning_rate": 4.7059822275876286e-05, + "loss": 0.7923, + "step": 8745 + }, + { + "epoch": 0.47, + "learning_rate": 4.705652641526471e-05, + "loss": 0.7434, + "step": 8750 + }, + { + "epoch": 0.47, + "learning_rate": 4.705322882394906e-05, + "loss": 0.802, + "step": 8755 + }, + { + "epoch": 0.47, + "learning_rate": 4.704992950218809e-05, + "loss": 0.8143, + "step": 8760 + }, + { + "epoch": 0.47, + "learning_rate": 4.7046628450240696e-05, + "loss": 0.8047, + "step": 8765 + }, + { + "epoch": 0.47, + "learning_rate": 4.704332566836588e-05, + "loss": 0.6798, + "step": 8770 + }, + { + "epoch": 0.47, + "learning_rate": 4.704002115682281e-05, + "loss": 0.6715, + "step": 8775 + }, + { + "epoch": 0.47, + "learning_rate": 4.7036714915870795e-05, + "loss": 0.6971, + "step": 8780 + }, + { + "epoch": 0.47, + "learning_rate": 4.7033406945769246e-05, + "loss": 0.8731, + "step": 8785 + }, + { + "epoch": 0.47, + "learning_rate": 4.703009724677773e-05, + "loss": 0.6715, + "step": 8790 + }, + { + "epoch": 0.47, + "learning_rate": 4.702678581915596e-05, + "loss": 0.8188, + "step": 8795 + }, + { + "epoch": 0.47, + "learning_rate": 4.702347266316376e-05, + "loss": 0.6578, + "step": 8800 + }, + { + "epoch": 0.47, + "learning_rate": 4.702015777906111e-05, + "loss": 0.7778, + "step": 8805 + }, + { + "epoch": 0.47, + "learning_rate": 4.701684116710813e-05, + "loss": 0.7524, + "step": 8810 + }, + { + "epoch": 0.47, + "learning_rate": 4.701352282756504e-05, + "loss": 0.9254, + "step": 8815 + }, + { + "epoch": 0.47, + "learning_rate": 4.701020276069222e-05, + "loss": 0.8136, + "step": 8820 + }, + { + "epoch": 0.47, + "learning_rate": 4.700688096675021e-05, + "loss": 0.8761, + "step": 8825 + }, + { + "epoch": 0.47, + "learning_rate": 4.700355744599965e-05, + "loss": 0.7901, + "step": 8830 + }, + { + "epoch": 0.47, + "learning_rate": 4.700023219870131e-05, + "loss": 0.8181, + "step": 8835 + }, + { + "epoch": 0.47, + "learning_rate": 4.699690522511612e-05, + "loss": 0.7215, + "step": 8840 + }, + { + "epoch": 0.47, + "learning_rate": 4.699357652550515e-05, + "loss": 0.6972, + "step": 8845 + }, + { + "epoch": 0.47, + "learning_rate": 4.6990246100129576e-05, + "loss": 0.6351, + "step": 8850 + }, + { + "epoch": 0.47, + "learning_rate": 4.698691394925073e-05, + "loss": 0.7755, + "step": 8855 + }, + { + "epoch": 0.47, + "learning_rate": 4.698358007313008e-05, + "loss": 0.7657, + "step": 8860 + }, + { + "epoch": 0.47, + "learning_rate": 4.698024447202922e-05, + "loss": 0.7957, + "step": 8865 + }, + { + "epoch": 0.47, + "learning_rate": 4.697690714620988e-05, + "loss": 0.8218, + "step": 8870 + }, + { + "epoch": 0.47, + "learning_rate": 4.697356809593394e-05, + "loss": 0.8059, + "step": 8875 + }, + { + "epoch": 0.48, + "learning_rate": 4.6970227321463396e-05, + "loss": 0.8861, + "step": 8880 + }, + { + "epoch": 0.48, + "learning_rate": 4.696688482306039e-05, + "loss": 0.6951, + "step": 8885 + }, + { + "epoch": 0.48, + "learning_rate": 4.69635406009872e-05, + "loss": 0.7081, + "step": 8890 + }, + { + "epoch": 0.48, + "learning_rate": 4.696019465550623e-05, + "loss": 0.7778, + "step": 8895 + }, + { + "epoch": 0.48, + "learning_rate": 4.6956846986880035e-05, + "loss": 0.854, + "step": 8900 + }, + { + "epoch": 0.48, + "learning_rate": 4.6953497595371275e-05, + "loss": 0.7726, + "step": 8905 + }, + { + "epoch": 0.48, + "learning_rate": 4.6950146481242794e-05, + "loss": 0.7738, + "step": 8910 + }, + { + "epoch": 0.48, + "learning_rate": 4.6946793644757526e-05, + "loss": 0.7351, + "step": 8915 + }, + { + "epoch": 0.48, + "learning_rate": 4.694343908617855e-05, + "loss": 0.7159, + "step": 8920 + }, + { + "epoch": 0.48, + "learning_rate": 4.694008280576911e-05, + "loss": 0.8371, + "step": 8925 + }, + { + "epoch": 0.48, + "learning_rate": 4.693672480379255e-05, + "loss": 0.7943, + "step": 8930 + }, + { + "epoch": 0.48, + "learning_rate": 4.693336508051236e-05, + "loss": 0.7223, + "step": 8935 + }, + { + "epoch": 0.48, + "learning_rate": 4.693000363619218e-05, + "loss": 0.6927, + "step": 8940 + }, + { + "epoch": 0.48, + "learning_rate": 4.692664047109574e-05, + "loss": 0.824, + "step": 8945 + }, + { + "epoch": 0.48, + "learning_rate": 4.692327558548697e-05, + "loss": 0.8323, + "step": 8950 + }, + { + "epoch": 0.48, + "learning_rate": 4.6919908979629877e-05, + "loss": 0.7352, + "step": 8955 + }, + { + "epoch": 0.48, + "learning_rate": 4.691654065378865e-05, + "loss": 0.7701, + "step": 8960 + }, + { + "epoch": 0.48, + "learning_rate": 4.6913170608227574e-05, + "loss": 0.8181, + "step": 8965 + }, + { + "epoch": 0.48, + "learning_rate": 4.69097988432111e-05, + "loss": 0.774, + "step": 8970 + }, + { + "epoch": 0.48, + "learning_rate": 4.690642535900378e-05, + "loss": 0.7599, + "step": 8975 + }, + { + "epoch": 0.48, + "learning_rate": 4.690305015587033e-05, + "loss": 0.7864, + "step": 8980 + }, + { + "epoch": 0.48, + "learning_rate": 4.6899673234075604e-05, + "loss": 0.9758, + "step": 8985 + }, + { + "epoch": 0.48, + "learning_rate": 4.689629459388456e-05, + "loss": 0.7617, + "step": 8990 + }, + { + "epoch": 0.48, + "learning_rate": 4.6892914235562313e-05, + "loss": 0.7715, + "step": 8995 + }, + { + "epoch": 0.48, + "learning_rate": 4.6889532159374114e-05, + "loss": 0.8653, + "step": 9000 + }, + { + "epoch": 0.48, + "learning_rate": 4.688614836558534e-05, + "loss": 0.7397, + "step": 9005 + }, + { + "epoch": 0.48, + "learning_rate": 4.68827628544615e-05, + "loss": 0.8779, + "step": 9010 + }, + { + "epoch": 0.48, + "learning_rate": 4.6879375626268265e-05, + "loss": 0.6842, + "step": 9015 + }, + { + "epoch": 0.48, + "learning_rate": 4.6875986681271396e-05, + "loss": 0.7844, + "step": 9020 + }, + { + "epoch": 0.48, + "learning_rate": 4.687259601973683e-05, + "loss": 0.8776, + "step": 9025 + }, + { + "epoch": 0.48, + "learning_rate": 4.6869203641930604e-05, + "loss": 0.8472, + "step": 9030 + }, + { + "epoch": 0.48, + "learning_rate": 4.686580954811892e-05, + "loss": 0.7991, + "step": 9035 + }, + { + "epoch": 0.48, + "learning_rate": 4.68624137385681e-05, + "loss": 0.8891, + "step": 9040 + }, + { + "epoch": 0.48, + "learning_rate": 4.68590162135446e-05, + "loss": 0.8141, + "step": 9045 + }, + { + "epoch": 0.48, + "learning_rate": 4.6855616973315005e-05, + "loss": 0.7931, + "step": 9050 + }, + { + "epoch": 0.48, + "learning_rate": 4.6852216018146064e-05, + "loss": 0.8202, + "step": 9055 + }, + { + "epoch": 0.48, + "learning_rate": 4.684881334830462e-05, + "loss": 0.6925, + "step": 9060 + }, + { + "epoch": 0.48, + "learning_rate": 4.684540896405767e-05, + "loss": 0.7158, + "step": 9065 + }, + { + "epoch": 0.49, + "learning_rate": 4.684200286567235e-05, + "loss": 0.7355, + "step": 9070 + }, + { + "epoch": 0.49, + "learning_rate": 4.6838595053415926e-05, + "loss": 0.7648, + "step": 9075 + }, + { + "epoch": 0.49, + "learning_rate": 4.68351855275558e-05, + "loss": 0.821, + "step": 9080 + }, + { + "epoch": 0.49, + "learning_rate": 4.683177428835951e-05, + "loss": 0.8214, + "step": 9085 + }, + { + "epoch": 0.49, + "learning_rate": 4.6828361336094705e-05, + "loss": 0.6237, + "step": 9090 + }, + { + "epoch": 0.49, + "learning_rate": 4.682494667102921e-05, + "loss": 0.9444, + "step": 9095 + }, + { + "epoch": 0.49, + "learning_rate": 4.682153029343095e-05, + "loss": 0.7413, + "step": 9100 + }, + { + "epoch": 0.49, + "learning_rate": 4.6818112203568e-05, + "loss": 0.8892, + "step": 9105 + }, + { + "epoch": 0.49, + "learning_rate": 4.681469240170857e-05, + "loss": 0.8794, + "step": 9110 + }, + { + "epoch": 0.49, + "learning_rate": 4.6811270888121006e-05, + "loss": 0.756, + "step": 9115 + }, + { + "epoch": 0.49, + "learning_rate": 4.680784766307377e-05, + "loss": 0.7525, + "step": 9120 + }, + { + "epoch": 0.49, + "learning_rate": 4.680442272683547e-05, + "loss": 0.9392, + "step": 9125 + }, + { + "epoch": 0.49, + "learning_rate": 4.680099607967487e-05, + "loss": 0.7322, + "step": 9130 + }, + { + "epoch": 0.49, + "learning_rate": 4.679756772186083e-05, + "loss": 0.8301, + "step": 9135 + }, + { + "epoch": 0.49, + "learning_rate": 4.679413765366236e-05, + "loss": 0.724, + "step": 9140 + }, + { + "epoch": 0.49, + "learning_rate": 4.6790705875348614e-05, + "loss": 0.8354, + "step": 9145 + }, + { + "epoch": 0.49, + "learning_rate": 4.678727238718888e-05, + "loss": 0.8303, + "step": 9150 + }, + { + "epoch": 0.49, + "learning_rate": 4.6783837189452565e-05, + "loss": 0.9175, + "step": 9155 + }, + { + "epoch": 0.49, + "learning_rate": 4.678040028240921e-05, + "loss": 0.8232, + "step": 9160 + }, + { + "epoch": 0.49, + "learning_rate": 4.67769616663285e-05, + "loss": 0.8086, + "step": 9165 + }, + { + "epoch": 0.49, + "learning_rate": 4.677352134148026e-05, + "loss": 0.9872, + "step": 9170 + }, + { + "epoch": 0.49, + "learning_rate": 4.677007930813445e-05, + "loss": 0.6335, + "step": 9175 + }, + { + "epoch": 0.49, + "learning_rate": 4.676663556656114e-05, + "loss": 0.8807, + "step": 9180 + }, + { + "epoch": 0.49, + "learning_rate": 4.6763190117030543e-05, + "loss": 0.7734, + "step": 9185 + }, + { + "epoch": 0.49, + "learning_rate": 4.6759742959813027e-05, + "loss": 0.8999, + "step": 9190 + }, + { + "epoch": 0.49, + "learning_rate": 4.675629409517907e-05, + "loss": 0.8552, + "step": 9195 + }, + { + "epoch": 0.49, + "learning_rate": 4.6752843523399305e-05, + "loss": 0.6333, + "step": 9200 + }, + { + "epoch": 0.49, + "learning_rate": 4.674939124474447e-05, + "loss": 0.7171, + "step": 9205 + }, + { + "epoch": 0.49, + "learning_rate": 4.674593725948547e-05, + "loss": 0.8502, + "step": 9210 + }, + { + "epoch": 0.49, + "learning_rate": 4.6742481567893324e-05, + "loss": 0.7937, + "step": 9215 + }, + { + "epoch": 0.49, + "learning_rate": 4.6739024170239175e-05, + "loss": 0.7489, + "step": 9220 + }, + { + "epoch": 0.49, + "learning_rate": 4.673556506679434e-05, + "loss": 0.8656, + "step": 9225 + }, + { + "epoch": 0.49, + "learning_rate": 4.6732104257830226e-05, + "loss": 0.9049, + "step": 9230 + }, + { + "epoch": 0.49, + "learning_rate": 4.672864174361839e-05, + "loss": 0.8415, + "step": 9235 + }, + { + "epoch": 0.49, + "learning_rate": 4.6725177524430524e-05, + "loss": 0.6675, + "step": 9240 + }, + { + "epoch": 0.49, + "learning_rate": 4.6721711600538466e-05, + "loss": 0.8559, + "step": 9245 + }, + { + "epoch": 0.49, + "learning_rate": 4.671824397221416e-05, + "loss": 0.6655, + "step": 9250 + }, + { + "epoch": 0.5, + "learning_rate": 4.671477463972972e-05, + "loss": 0.8897, + "step": 9255 + }, + { + "epoch": 0.5, + "learning_rate": 4.671130360335735e-05, + "loss": 0.6493, + "step": 9260 + }, + { + "epoch": 0.5, + "learning_rate": 4.670783086336943e-05, + "loss": 0.8354, + "step": 9265 + }, + { + "epoch": 0.5, + "learning_rate": 4.670435642003844e-05, + "loss": 0.8187, + "step": 9270 + }, + { + "epoch": 0.5, + "learning_rate": 4.670088027363703e-05, + "loss": 0.8215, + "step": 9275 + }, + { + "epoch": 0.5, + "learning_rate": 4.6697402424437934e-05, + "loss": 0.7948, + "step": 9280 + }, + { + "epoch": 0.5, + "learning_rate": 4.6693922872714055e-05, + "loss": 0.8136, + "step": 9285 + }, + { + "epoch": 0.5, + "learning_rate": 4.669044161873845e-05, + "loss": 0.7081, + "step": 9290 + }, + { + "epoch": 0.5, + "learning_rate": 4.668695866278424e-05, + "loss": 0.7798, + "step": 9295 + }, + { + "epoch": 0.5, + "learning_rate": 4.668347400512474e-05, + "loss": 0.8168, + "step": 9300 + }, + { + "epoch": 0.5, + "learning_rate": 4.667998764603339e-05, + "loss": 0.843, + "step": 9305 + }, + { + "epoch": 0.5, + "learning_rate": 4.667649958578374e-05, + "loss": 0.7505, + "step": 9310 + }, + { + "epoch": 0.5, + "learning_rate": 4.6673009824649495e-05, + "loss": 0.7307, + "step": 9315 + }, + { + "epoch": 0.5, + "learning_rate": 4.666951836290448e-05, + "loss": 0.867, + "step": 9320 + }, + { + "epoch": 0.5, + "learning_rate": 4.666602520082265e-05, + "loss": 0.8448, + "step": 9325 + }, + { + "epoch": 0.5, + "learning_rate": 4.6662530338678126e-05, + "loss": 0.8344, + "step": 9330 + }, + { + "epoch": 0.5, + "learning_rate": 4.665903377674511e-05, + "loss": 0.9449, + "step": 9335 + }, + { + "epoch": 0.5, + "learning_rate": 4.6655535515297985e-05, + "loss": 0.6916, + "step": 9340 + }, + { + "epoch": 0.5, + "learning_rate": 4.6652035554611243e-05, + "loss": 0.866, + "step": 9345 + }, + { + "epoch": 0.5, + "learning_rate": 4.664853389495952e-05, + "loss": 0.9566, + "step": 9350 + }, + { + "epoch": 0.5, + "learning_rate": 4.6645030536617565e-05, + "loss": 0.7587, + "step": 9355 + }, + { + "epoch": 0.5, + "learning_rate": 4.664152547986029e-05, + "loss": 0.8226, + "step": 9360 + }, + { + "epoch": 0.5, + "learning_rate": 4.663801872496273e-05, + "loss": 0.8038, + "step": 9365 + }, + { + "epoch": 0.5, + "learning_rate": 4.6634510272200024e-05, + "loss": 0.8376, + "step": 9370 + }, + { + "epoch": 0.5, + "learning_rate": 4.663100012184749e-05, + "loss": 0.7771, + "step": 9375 + }, + { + "epoch": 0.5, + "learning_rate": 4.6627488274180554e-05, + "loss": 0.769, + "step": 9380 + }, + { + "epoch": 0.5, + "learning_rate": 4.662397472947477e-05, + "loss": 0.846, + "step": 9385 + }, + { + "epoch": 0.5, + "learning_rate": 4.662045948800585e-05, + "loss": 0.7619, + "step": 9390 + }, + { + "epoch": 0.5, + "learning_rate": 4.661694255004961e-05, + "loss": 0.7534, + "step": 9395 + }, + { + "epoch": 0.5, + "learning_rate": 4.6613423915882014e-05, + "loss": 0.805, + "step": 9400 + }, + { + "epoch": 0.5, + "learning_rate": 4.660990358577917e-05, + "loss": 0.9179, + "step": 9405 + }, + { + "epoch": 0.5, + "learning_rate": 4.66063815600173e-05, + "loss": 0.711, + "step": 9410 + }, + { + "epoch": 0.5, + "learning_rate": 4.660285783887276e-05, + "loss": 0.9107, + "step": 9415 + }, + { + "epoch": 0.5, + "learning_rate": 4.659933242262204e-05, + "loss": 0.666, + "step": 9420 + }, + { + "epoch": 0.5, + "learning_rate": 4.6595805311541793e-05, + "loss": 0.7529, + "step": 9425 + }, + { + "epoch": 0.5, + "learning_rate": 4.659227650590876e-05, + "loss": 0.8221, + "step": 9430 + }, + { + "epoch": 0.5, + "learning_rate": 4.658874600599984e-05, + "loss": 0.7809, + "step": 9435 + }, + { + "epoch": 0.51, + "learning_rate": 4.658521381209206e-05, + "loss": 0.749, + "step": 9440 + }, + { + "epoch": 0.51, + "learning_rate": 4.658167992446257e-05, + "loss": 0.636, + "step": 9445 + }, + { + "epoch": 0.51, + "learning_rate": 4.6578144343388676e-05, + "loss": 0.7896, + "step": 9450 + }, + { + "epoch": 0.51, + "learning_rate": 4.6574607069147804e-05, + "loss": 0.884, + "step": 9455 + }, + { + "epoch": 0.51, + "learning_rate": 4.65710681020175e-05, + "loss": 0.9028, + "step": 9460 + }, + { + "epoch": 0.51, + "learning_rate": 4.656752744227547e-05, + "loss": 0.7473, + "step": 9465 + }, + { + "epoch": 0.51, + "learning_rate": 4.656398509019954e-05, + "loss": 0.6627, + "step": 9470 + }, + { + "epoch": 0.51, + "learning_rate": 4.6560441046067636e-05, + "loss": 0.7747, + "step": 9475 + }, + { + "epoch": 0.51, + "learning_rate": 4.6556895310157886e-05, + "loss": 0.7177, + "step": 9480 + }, + { + "epoch": 0.51, + "learning_rate": 4.655334788274849e-05, + "loss": 0.5813, + "step": 9485 + }, + { + "epoch": 0.51, + "learning_rate": 4.6549798764117814e-05, + "loss": 0.7672, + "step": 9490 + }, + { + "epoch": 0.51, + "learning_rate": 4.654624795454433e-05, + "loss": 0.8411, + "step": 9495 + }, + { + "epoch": 0.51, + "learning_rate": 4.654269545430668e-05, + "loss": 0.802, + "step": 9500 + }, + { + "epoch": 0.51, + "learning_rate": 4.653914126368361e-05, + "loss": 0.8177, + "step": 9505 + }, + { + "epoch": 0.51, + "learning_rate": 4.653558538295399e-05, + "loss": 0.9998, + "step": 9510 + }, + { + "epoch": 0.51, + "learning_rate": 4.6532027812396864e-05, + "loss": 0.765, + "step": 9515 + }, + { + "epoch": 0.51, + "learning_rate": 4.652846855229136e-05, + "loss": 0.8213, + "step": 9520 + }, + { + "epoch": 0.51, + "learning_rate": 4.6524907602916776e-05, + "loss": 0.7947, + "step": 9525 + }, + { + "epoch": 0.51, + "learning_rate": 4.652134496455251e-05, + "loss": 0.8174, + "step": 9530 + }, + { + "epoch": 0.51, + "learning_rate": 4.651778063747815e-05, + "loss": 0.7767, + "step": 9535 + }, + { + "epoch": 0.51, + "learning_rate": 4.651421462197333e-05, + "loss": 0.8102, + "step": 9540 + }, + { + "epoch": 0.51, + "learning_rate": 4.65106469183179e-05, + "loss": 0.7869, + "step": 9545 + }, + { + "epoch": 0.51, + "learning_rate": 4.650707752679178e-05, + "loss": 0.8698, + "step": 9550 + }, + { + "epoch": 0.51, + "learning_rate": 4.650350644767507e-05, + "loss": 0.7939, + "step": 9555 + }, + { + "epoch": 0.51, + "learning_rate": 4.649993368124797e-05, + "loss": 0.8774, + "step": 9560 + }, + { + "epoch": 0.51, + "learning_rate": 4.649635922779081e-05, + "loss": 0.9838, + "step": 9565 + }, + { + "epoch": 0.51, + "learning_rate": 4.649278308758409e-05, + "loss": 0.8833, + "step": 9570 + }, + { + "epoch": 0.51, + "learning_rate": 4.648920526090841e-05, + "loss": 0.763, + "step": 9575 + }, + { + "epoch": 0.51, + "learning_rate": 4.6485625748044506e-05, + "loss": 0.7466, + "step": 9580 + }, + { + "epoch": 0.51, + "learning_rate": 4.648204454927325e-05, + "loss": 0.759, + "step": 9585 + }, + { + "epoch": 0.51, + "learning_rate": 4.647846166487566e-05, + "loss": 0.6762, + "step": 9590 + }, + { + "epoch": 0.51, + "learning_rate": 4.6474877095132854e-05, + "loss": 0.7819, + "step": 9595 + }, + { + "epoch": 0.51, + "learning_rate": 4.647129084032612e-05, + "loss": 0.7402, + "step": 9600 + }, + { + "epoch": 0.51, + "learning_rate": 4.646770290073684e-05, + "loss": 0.8402, + "step": 9605 + }, + { + "epoch": 0.51, + "learning_rate": 4.646411327664657e-05, + "loss": 0.913, + "step": 9610 + }, + { + "epoch": 0.51, + "learning_rate": 4.646052196833696e-05, + "loss": 0.8166, + "step": 9615 + }, + { + "epoch": 0.51, + "learning_rate": 4.645692897608982e-05, + "loss": 0.8481, + "step": 9620 + }, + { + "epoch": 0.51, + "learning_rate": 4.645333430018707e-05, + "loss": 0.7684, + "step": 9625 + }, + { + "epoch": 0.52, + "learning_rate": 4.6449737940910766e-05, + "loss": 0.6542, + "step": 9630 + }, + { + "epoch": 0.52, + "learning_rate": 4.6446139898543124e-05, + "loss": 0.9037, + "step": 9635 + }, + { + "epoch": 0.52, + "learning_rate": 4.644254017336645e-05, + "loss": 0.7863, + "step": 9640 + }, + { + "epoch": 0.52, + "learning_rate": 4.643893876566323e-05, + "loss": 0.7283, + "step": 9645 + }, + { + "epoch": 0.52, + "learning_rate": 4.643533567571603e-05, + "loss": 0.8001, + "step": 9650 + }, + { + "epoch": 0.52, + "learning_rate": 4.6431730903807574e-05, + "loss": 0.9337, + "step": 9655 + }, + { + "epoch": 0.52, + "learning_rate": 4.642812445022073e-05, + "loss": 0.7278, + "step": 9660 + }, + { + "epoch": 0.52, + "learning_rate": 4.6424516315238476e-05, + "loss": 0.771, + "step": 9665 + }, + { + "epoch": 0.52, + "learning_rate": 4.642090649914393e-05, + "loss": 0.7458, + "step": 9670 + }, + { + "epoch": 0.52, + "learning_rate": 4.641729500222034e-05, + "loss": 0.8958, + "step": 9675 + }, + { + "epoch": 0.52, + "learning_rate": 4.64136818247511e-05, + "loss": 0.7685, + "step": 9680 + }, + { + "epoch": 0.52, + "learning_rate": 4.6410066967019716e-05, + "loss": 0.9039, + "step": 9685 + }, + { + "epoch": 0.52, + "learning_rate": 4.640645042930985e-05, + "loss": 0.7324, + "step": 9690 + }, + { + "epoch": 0.52, + "learning_rate": 4.6402832211905244e-05, + "loss": 0.9173, + "step": 9695 + }, + { + "epoch": 0.52, + "learning_rate": 4.639921231508983e-05, + "loss": 0.8128, + "step": 9700 + }, + { + "epoch": 0.52, + "learning_rate": 4.639559073914767e-05, + "loss": 0.7656, + "step": 9705 + }, + { + "epoch": 0.52, + "learning_rate": 4.63919674843629e-05, + "loss": 0.6259, + "step": 9710 + }, + { + "epoch": 0.52, + "learning_rate": 4.638834255101985e-05, + "loss": 0.7749, + "step": 9715 + }, + { + "epoch": 0.52, + "learning_rate": 4.638471593940293e-05, + "loss": 0.8371, + "step": 9720 + }, + { + "epoch": 0.52, + "learning_rate": 4.638108764979675e-05, + "loss": 0.8643, + "step": 9725 + }, + { + "epoch": 0.52, + "learning_rate": 4.637745768248597e-05, + "loss": 0.8237, + "step": 9730 + }, + { + "epoch": 0.52, + "learning_rate": 4.6373826037755454e-05, + "loss": 0.8107, + "step": 9735 + }, + { + "epoch": 0.52, + "learning_rate": 4.637019271589014e-05, + "loss": 0.8004, + "step": 9740 + }, + { + "epoch": 0.52, + "learning_rate": 4.636655771717513e-05, + "loss": 0.7253, + "step": 9745 + }, + { + "epoch": 0.52, + "learning_rate": 4.636292104189566e-05, + "loss": 0.8304, + "step": 9750 + }, + { + "epoch": 0.52, + "learning_rate": 4.635928269033708e-05, + "loss": 0.8822, + "step": 9755 + }, + { + "epoch": 0.52, + "learning_rate": 4.635564266278488e-05, + "loss": 0.8726, + "step": 9760 + }, + { + "epoch": 0.52, + "learning_rate": 4.635200095952468e-05, + "loss": 0.8136, + "step": 9765 + }, + { + "epoch": 0.52, + "learning_rate": 4.6348357580842246e-05, + "loss": 0.8866, + "step": 9770 + }, + { + "epoch": 0.52, + "learning_rate": 4.6344712527023435e-05, + "loss": 0.804, + "step": 9775 + }, + { + "epoch": 0.52, + "learning_rate": 4.634106579835429e-05, + "loss": 0.671, + "step": 9780 + }, + { + "epoch": 0.52, + "learning_rate": 4.633741739512095e-05, + "loss": 0.8072, + "step": 9785 + }, + { + "epoch": 0.52, + "learning_rate": 4.633376731760968e-05, + "loss": 0.6597, + "step": 9790 + }, + { + "epoch": 0.52, + "learning_rate": 4.63301155661069e-05, + "loss": 0.7634, + "step": 9795 + }, + { + "epoch": 0.52, + "learning_rate": 4.6326462140899154e-05, + "loss": 0.7513, + "step": 9800 + }, + { + "epoch": 0.52, + "learning_rate": 4.632280704227311e-05, + "loss": 0.8291, + "step": 9805 + }, + { + "epoch": 0.52, + "learning_rate": 4.6319150270515576e-05, + "loss": 0.7837, + "step": 9810 + }, + { + "epoch": 0.53, + "learning_rate": 4.6315491825913484e-05, + "loss": 0.8024, + "step": 9815 + }, + { + "epoch": 0.53, + "learning_rate": 4.63118317087539e-05, + "loss": 0.8076, + "step": 9820 + }, + { + "epoch": 0.53, + "learning_rate": 4.630816991932402e-05, + "loss": 0.6843, + "step": 9825 + }, + { + "epoch": 0.53, + "learning_rate": 4.630450645791118e-05, + "loss": 0.7388, + "step": 9830 + }, + { + "epoch": 0.53, + "learning_rate": 4.630084132480282e-05, + "loss": 0.7083, + "step": 9835 + }, + { + "epoch": 0.53, + "learning_rate": 4.629717452028656e-05, + "loss": 0.8184, + "step": 9840 + }, + { + "epoch": 0.53, + "learning_rate": 4.6293506044650105e-05, + "loss": 0.8329, + "step": 9845 + }, + { + "epoch": 0.53, + "learning_rate": 4.628983589818131e-05, + "loss": 0.7568, + "step": 9850 + }, + { + "epoch": 0.53, + "learning_rate": 4.628616408116816e-05, + "loss": 0.9127, + "step": 9855 + }, + { + "epoch": 0.53, + "learning_rate": 4.628249059389877e-05, + "loss": 0.82, + "step": 9860 + }, + { + "epoch": 0.53, + "learning_rate": 4.627881543666139e-05, + "loss": 0.7945, + "step": 9865 + }, + { + "epoch": 0.53, + "learning_rate": 4.6275138609744396e-05, + "loss": 0.7832, + "step": 9870 + }, + { + "epoch": 0.53, + "learning_rate": 4.6271460113436295e-05, + "loss": 0.8366, + "step": 9875 + }, + { + "epoch": 0.53, + "learning_rate": 4.626777994802572e-05, + "loss": 0.6796, + "step": 9880 + }, + { + "epoch": 0.53, + "learning_rate": 4.626409811380147e-05, + "loss": 0.8064, + "step": 9885 + }, + { + "epoch": 0.53, + "learning_rate": 4.626041461105241e-05, + "loss": 0.6502, + "step": 9890 + }, + { + "epoch": 0.53, + "learning_rate": 4.625672944006759e-05, + "loss": 0.7781, + "step": 9895 + }, + { + "epoch": 0.53, + "learning_rate": 4.625304260113617e-05, + "loss": 0.6633, + "step": 9900 + }, + { + "epoch": 0.53, + "learning_rate": 4.6249354094547456e-05, + "loss": 0.9537, + "step": 9905 + }, + { + "epoch": 0.53, + "learning_rate": 4.6245663920590856e-05, + "loss": 0.7638, + "step": 9910 + }, + { + "epoch": 0.53, + "learning_rate": 4.624197207955594e-05, + "loss": 0.6981, + "step": 9915 + }, + { + "epoch": 0.53, + "learning_rate": 4.6238278571732386e-05, + "loss": 0.7026, + "step": 9920 + }, + { + "epoch": 0.53, + "learning_rate": 4.6234583397410017e-05, + "loss": 0.6439, + "step": 9925 + }, + { + "epoch": 0.53, + "learning_rate": 4.6230886556878775e-05, + "loss": 0.8046, + "step": 9930 + }, + { + "epoch": 0.53, + "learning_rate": 4.6227188050428734e-05, + "loss": 0.6273, + "step": 9935 + }, + { + "epoch": 0.53, + "learning_rate": 4.622348787835012e-05, + "loss": 0.7367, + "step": 9940 + }, + { + "epoch": 0.53, + "learning_rate": 4.621978604093326e-05, + "loss": 0.7979, + "step": 9945 + }, + { + "epoch": 0.53, + "learning_rate": 4.621608253846864e-05, + "loss": 0.7118, + "step": 9950 + }, + { + "epoch": 0.53, + "learning_rate": 4.6212377371246845e-05, + "loss": 0.8271, + "step": 9955 + }, + { + "epoch": 0.53, + "learning_rate": 4.620867053955862e-05, + "loss": 0.8968, + "step": 9960 + }, + { + "epoch": 0.53, + "learning_rate": 4.620496204369482e-05, + "loss": 0.779, + "step": 9965 + }, + { + "epoch": 0.53, + "learning_rate": 4.620125188394644e-05, + "loss": 0.8184, + "step": 9970 + }, + { + "epoch": 0.53, + "learning_rate": 4.619754006060461e-05, + "loss": 0.8621, + "step": 9975 + }, + { + "epoch": 0.53, + "learning_rate": 4.619382657396057e-05, + "loss": 0.8337, + "step": 9980 + }, + { + "epoch": 0.53, + "learning_rate": 4.619011142430574e-05, + "loss": 0.8226, + "step": 9985 + }, + { + "epoch": 0.53, + "learning_rate": 4.618639461193159e-05, + "loss": 0.7623, + "step": 9990 + }, + { + "epoch": 0.53, + "learning_rate": 4.61826761371298e-05, + "loss": 0.7875, + "step": 9995 + }, + { + "epoch": 0.53, + "learning_rate": 4.6178956000192127e-05, + "loss": 0.7813, + "step": 10000 + }, + { + "epoch": 0.54, + "learning_rate": 4.617523420141049e-05, + "loss": 0.7784, + "step": 10005 + }, + { + "epoch": 0.54, + "learning_rate": 4.6171510741076916e-05, + "loss": 0.7646, + "step": 10010 + }, + { + "epoch": 0.54, + "learning_rate": 4.6167785619483585e-05, + "loss": 0.7866, + "step": 10015 + }, + { + "epoch": 0.54, + "learning_rate": 4.616405883692279e-05, + "loss": 0.7771, + "step": 10020 + }, + { + "epoch": 0.54, + "learning_rate": 4.616033039368695e-05, + "loss": 0.7228, + "step": 10025 + }, + { + "epoch": 0.54, + "learning_rate": 4.615660029006864e-05, + "loss": 0.8959, + "step": 10030 + }, + { + "epoch": 0.54, + "learning_rate": 4.615286852636054e-05, + "loss": 0.9012, + "step": 10035 + }, + { + "epoch": 0.54, + "learning_rate": 4.6149135102855466e-05, + "loss": 0.7588, + "step": 10040 + }, + { + "epoch": 0.54, + "learning_rate": 4.614540001984638e-05, + "loss": 0.592, + "step": 10045 + }, + { + "epoch": 0.54, + "learning_rate": 4.614166327762635e-05, + "loss": 0.7225, + "step": 10050 + }, + { + "epoch": 0.54, + "learning_rate": 4.6137924876488596e-05, + "loss": 0.7672, + "step": 10055 + }, + { + "epoch": 0.54, + "learning_rate": 4.6134184816726446e-05, + "loss": 0.8455, + "step": 10060 + }, + { + "epoch": 0.54, + "learning_rate": 4.613044309863338e-05, + "loss": 0.8741, + "step": 10065 + }, + { + "epoch": 0.54, + "learning_rate": 4.6126699722502994e-05, + "loss": 0.894, + "step": 10070 + }, + { + "epoch": 0.54, + "learning_rate": 4.612295468862903e-05, + "loss": 0.7773, + "step": 10075 + }, + { + "epoch": 0.54, + "learning_rate": 4.611920799730532e-05, + "loss": 0.7497, + "step": 10080 + }, + { + "epoch": 0.54, + "learning_rate": 4.611545964882589e-05, + "loss": 0.7893, + "step": 10085 + }, + { + "epoch": 0.54, + "learning_rate": 4.6111709643484844e-05, + "loss": 0.821, + "step": 10090 + }, + { + "epoch": 0.54, + "learning_rate": 4.610795798157642e-05, + "loss": 0.9373, + "step": 10095 + }, + { + "epoch": 0.54, + "learning_rate": 4.610420466339502e-05, + "loss": 0.8326, + "step": 10100 + }, + { + "epoch": 0.54, + "learning_rate": 4.6100449689235145e-05, + "loss": 0.7538, + "step": 10105 + }, + { + "epoch": 0.54, + "learning_rate": 4.6096693059391446e-05, + "loss": 0.656, + "step": 10110 + }, + { + "epoch": 0.54, + "learning_rate": 4.609293477415868e-05, + "loss": 0.7189, + "step": 10115 + }, + { + "epoch": 0.54, + "learning_rate": 4.6089174833831744e-05, + "loss": 0.9002, + "step": 10120 + }, + { + "epoch": 0.54, + "learning_rate": 4.608541323870568e-05, + "loss": 0.6934, + "step": 10125 + }, + { + "epoch": 0.54, + "learning_rate": 4.6081649989075646e-05, + "loss": 0.7381, + "step": 10130 + }, + { + "epoch": 0.54, + "learning_rate": 4.607788508523693e-05, + "loss": 0.7228, + "step": 10135 + }, + { + "epoch": 0.54, + "learning_rate": 4.607411852748495e-05, + "loss": 0.7072, + "step": 10140 + }, + { + "epoch": 0.54, + "learning_rate": 4.6070350316115266e-05, + "loss": 0.5865, + "step": 10145 + }, + { + "epoch": 0.54, + "learning_rate": 4.606658045142355e-05, + "loss": 0.8025, + "step": 10150 + }, + { + "epoch": 0.54, + "learning_rate": 4.6062808933705604e-05, + "loss": 0.8481, + "step": 10155 + }, + { + "epoch": 0.54, + "learning_rate": 4.605903576325737e-05, + "loss": 0.7504, + "step": 10160 + }, + { + "epoch": 0.54, + "learning_rate": 4.6055260940374924e-05, + "loss": 0.8906, + "step": 10165 + }, + { + "epoch": 0.54, + "learning_rate": 4.605148446535446e-05, + "loss": 0.7604, + "step": 10170 + }, + { + "epoch": 0.54, + "learning_rate": 4.60477063384923e-05, + "loss": 0.8846, + "step": 10175 + }, + { + "epoch": 0.54, + "learning_rate": 4.6043926560084916e-05, + "loss": 0.7143, + "step": 10180 + }, + { + "epoch": 0.54, + "learning_rate": 4.604014513042888e-05, + "loss": 0.7782, + "step": 10185 + }, + { + "epoch": 0.55, + "learning_rate": 4.6036362049820914e-05, + "loss": 0.7974, + "step": 10190 + }, + { + "epoch": 0.55, + "learning_rate": 4.603257731855787e-05, + "loss": 0.8214, + "step": 10195 + }, + { + "epoch": 0.55, + "learning_rate": 4.602879093693671e-05, + "loss": 0.7688, + "step": 10200 + }, + { + "epoch": 0.55, + "learning_rate": 4.602500290525455e-05, + "loss": 0.7307, + "step": 10205 + }, + { + "epoch": 0.55, + "learning_rate": 4.6021213223808624e-05, + "loss": 0.7982, + "step": 10210 + }, + { + "epoch": 0.55, + "learning_rate": 4.6017421892896287e-05, + "loss": 0.9078, + "step": 10215 + }, + { + "epoch": 0.55, + "learning_rate": 4.601362891281504e-05, + "loss": 0.9685, + "step": 10220 + }, + { + "epoch": 0.55, + "learning_rate": 4.600983428386251e-05, + "loss": 0.7428, + "step": 10225 + }, + { + "epoch": 0.55, + "learning_rate": 4.600603800633645e-05, + "loss": 0.8188, + "step": 10230 + }, + { + "epoch": 0.55, + "learning_rate": 4.600224008053472e-05, + "loss": 0.6804, + "step": 10235 + }, + { + "epoch": 0.55, + "learning_rate": 4.5998440506755356e-05, + "loss": 0.8504, + "step": 10240 + }, + { + "epoch": 0.55, + "learning_rate": 4.599463928529649e-05, + "loss": 0.9281, + "step": 10245 + }, + { + "epoch": 0.55, + "learning_rate": 4.5990836416456385e-05, + "loss": 0.7945, + "step": 10250 + }, + { + "epoch": 0.55, + "learning_rate": 4.598703190053344e-05, + "loss": 0.6668, + "step": 10255 + }, + { + "epoch": 0.55, + "learning_rate": 4.59832257378262e-05, + "loss": 0.8056, + "step": 10260 + }, + { + "epoch": 0.55, + "learning_rate": 4.5979417928633305e-05, + "loss": 0.7964, + "step": 10265 + }, + { + "epoch": 0.55, + "learning_rate": 4.597560847325355e-05, + "loss": 1.035, + "step": 10270 + }, + { + "epoch": 0.55, + "learning_rate": 4.597179737198584e-05, + "loss": 0.8906, + "step": 10275 + }, + { + "epoch": 0.55, + "learning_rate": 4.5967984625129235e-05, + "loss": 0.7478, + "step": 10280 + }, + { + "epoch": 0.55, + "learning_rate": 4.59641702329829e-05, + "loss": 0.8341, + "step": 10285 + }, + { + "epoch": 0.55, + "learning_rate": 4.5960354195846136e-05, + "loss": 0.7146, + "step": 10290 + }, + { + "epoch": 0.55, + "learning_rate": 4.595653651401838e-05, + "loss": 0.7813, + "step": 10295 + }, + { + "epoch": 0.55, + "learning_rate": 4.595271718779919e-05, + "loss": 0.7597, + "step": 10300 + }, + { + "epoch": 0.55, + "learning_rate": 4.594889621748825e-05, + "loss": 0.8173, + "step": 10305 + }, + { + "epoch": 0.55, + "learning_rate": 4.594507360338539e-05, + "loss": 0.6584, + "step": 10310 + }, + { + "epoch": 0.55, + "learning_rate": 4.594124934579056e-05, + "loss": 0.7892, + "step": 10315 + }, + { + "epoch": 0.55, + "learning_rate": 4.593742344500384e-05, + "loss": 0.7498, + "step": 10320 + }, + { + "epoch": 0.55, + "learning_rate": 4.593359590132541e-05, + "loss": 0.7586, + "step": 10325 + }, + { + "epoch": 0.55, + "learning_rate": 4.592976671505563e-05, + "loss": 0.7721, + "step": 10330 + }, + { + "epoch": 0.55, + "learning_rate": 4.5925935886494955e-05, + "loss": 0.7091, + "step": 10335 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922103415943986e-05, + "loss": 0.8232, + "step": 10340 + }, + { + "epoch": 0.55, + "learning_rate": 4.5918269303703425e-05, + "loss": 0.7667, + "step": 10345 + }, + { + "epoch": 0.55, + "learning_rate": 4.5914433550074145e-05, + "loss": 0.8969, + "step": 10350 + }, + { + "epoch": 0.55, + "learning_rate": 4.591059615535711e-05, + "loss": 0.8925, + "step": 10355 + }, + { + "epoch": 0.55, + "learning_rate": 4.5906757119853435e-05, + "loss": 0.7632, + "step": 10360 + }, + { + "epoch": 0.55, + "learning_rate": 4.5902916443864354e-05, + "loss": 0.8219, + "step": 10365 + }, + { + "epoch": 0.55, + "learning_rate": 4.589907412769123e-05, + "loss": 0.9253, + "step": 10370 + }, + { + "epoch": 0.56, + "learning_rate": 4.589523017163557e-05, + "loss": 0.7863, + "step": 10375 + }, + { + "epoch": 0.56, + "learning_rate": 4.5891384575998974e-05, + "loss": 0.8144, + "step": 10380 + }, + { + "epoch": 0.56, + "learning_rate": 4.588753734108321e-05, + "loss": 0.6786, + "step": 10385 + }, + { + "epoch": 0.56, + "learning_rate": 4.588368846719016e-05, + "loss": 0.7238, + "step": 10390 + }, + { + "epoch": 0.56, + "learning_rate": 4.587983795462183e-05, + "loss": 0.7105, + "step": 10395 + }, + { + "epoch": 0.56, + "learning_rate": 4.587598580368034e-05, + "loss": 0.8256, + "step": 10400 + }, + { + "epoch": 0.56, + "learning_rate": 4.587213201466798e-05, + "loss": 0.738, + "step": 10405 + }, + { + "epoch": 0.56, + "learning_rate": 4.586827658788714e-05, + "loss": 0.6176, + "step": 10410 + }, + { + "epoch": 0.56, + "learning_rate": 4.586441952364032e-05, + "loss": 0.759, + "step": 10415 + }, + { + "epoch": 0.56, + "learning_rate": 4.5860560822230206e-05, + "loss": 0.795, + "step": 10420 + }, + { + "epoch": 0.56, + "learning_rate": 4.585670048395955e-05, + "loss": 0.7823, + "step": 10425 + }, + { + "epoch": 0.56, + "learning_rate": 4.585283850913128e-05, + "loss": 0.8893, + "step": 10430 + }, + { + "epoch": 0.56, + "learning_rate": 4.584897489804841e-05, + "loss": 0.7336, + "step": 10435 + }, + { + "epoch": 0.56, + "learning_rate": 4.584510965101413e-05, + "loss": 0.7736, + "step": 10440 + }, + { + "epoch": 0.56, + "learning_rate": 4.5841242768331713e-05, + "loss": 0.8292, + "step": 10445 + }, + { + "epoch": 0.56, + "learning_rate": 4.583737425030459e-05, + "loss": 0.8093, + "step": 10450 + }, + { + "epoch": 0.56, + "learning_rate": 4.583350409723631e-05, + "loss": 0.8556, + "step": 10455 + }, + { + "epoch": 0.56, + "learning_rate": 4.582963230943056e-05, + "loss": 0.7383, + "step": 10460 + }, + { + "epoch": 0.56, + "learning_rate": 4.582575888719113e-05, + "loss": 0.8949, + "step": 10465 + }, + { + "epoch": 0.56, + "learning_rate": 4.5821883830821966e-05, + "loss": 0.8084, + "step": 10470 + }, + { + "epoch": 0.56, + "learning_rate": 4.581800714062713e-05, + "loss": 0.7045, + "step": 10475 + }, + { + "epoch": 0.56, + "learning_rate": 4.5814128816910805e-05, + "loss": 0.7557, + "step": 10480 + }, + { + "epoch": 0.56, + "learning_rate": 4.581024885997732e-05, + "loss": 0.859, + "step": 10485 + }, + { + "epoch": 0.56, + "learning_rate": 4.5806367270131125e-05, + "loss": 0.7317, + "step": 10490 + }, + { + "epoch": 0.56, + "learning_rate": 4.5802484047676784e-05, + "loss": 0.8322, + "step": 10495 + }, + { + "epoch": 0.56, + "learning_rate": 4.5798599192919014e-05, + "loss": 0.6434, + "step": 10500 + }, + { + "epoch": 0.56, + "learning_rate": 4.579471270616264e-05, + "loss": 0.8798, + "step": 10505 + }, + { + "epoch": 0.56, + "learning_rate": 4.579082458771261e-05, + "loss": 0.731, + "step": 10510 + }, + { + "epoch": 0.56, + "learning_rate": 4.578693483787404e-05, + "loss": 0.7318, + "step": 10515 + }, + { + "epoch": 0.56, + "learning_rate": 4.5783043456952126e-05, + "loss": 0.8649, + "step": 10520 + }, + { + "epoch": 0.56, + "learning_rate": 4.577915044525221e-05, + "loss": 0.7678, + "step": 10525 + }, + { + "epoch": 0.56, + "learning_rate": 4.5775255803079776e-05, + "loss": 0.6588, + "step": 10530 + }, + { + "epoch": 0.56, + "learning_rate": 4.577135953074042e-05, + "loss": 0.764, + "step": 10535 + }, + { + "epoch": 0.56, + "learning_rate": 4.576746162853986e-05, + "loss": 0.9417, + "step": 10540 + }, + { + "epoch": 0.56, + "learning_rate": 4.576356209678396e-05, + "loss": 0.7035, + "step": 10545 + }, + { + "epoch": 0.56, + "learning_rate": 4.5759660935778716e-05, + "loss": 0.7982, + "step": 10550 + }, + { + "epoch": 0.56, + "learning_rate": 4.575575814583022e-05, + "loss": 0.6423, + "step": 10555 + }, + { + "epoch": 0.56, + "learning_rate": 4.575185372724472e-05, + "loss": 0.7089, + "step": 10560 + }, + { + "epoch": 0.57, + "learning_rate": 4.5747947680328574e-05, + "loss": 0.6855, + "step": 10565 + }, + { + "epoch": 0.57, + "learning_rate": 4.574404000538829e-05, + "loss": 0.784, + "step": 10570 + }, + { + "epoch": 0.57, + "learning_rate": 4.574013070273049e-05, + "loss": 0.7017, + "step": 10575 + }, + { + "epoch": 0.57, + "learning_rate": 4.5736219772661906e-05, + "loss": 0.7412, + "step": 10580 + }, + { + "epoch": 0.57, + "learning_rate": 4.573230721548944e-05, + "loss": 0.8399, + "step": 10585 + }, + { + "epoch": 0.57, + "learning_rate": 4.572839303152008e-05, + "loss": 0.75, + "step": 10590 + }, + { + "epoch": 0.57, + "learning_rate": 4.5724477221060965e-05, + "loss": 0.8845, + "step": 10595 + }, + { + "epoch": 0.57, + "learning_rate": 4.572055978441937e-05, + "loss": 0.7167, + "step": 10600 + }, + { + "epoch": 0.57, + "learning_rate": 4.571664072190266e-05, + "loss": 0.8206, + "step": 10605 + }, + { + "epoch": 0.57, + "learning_rate": 4.571272003381836e-05, + "loss": 0.7914, + "step": 10610 + }, + { + "epoch": 0.57, + "learning_rate": 4.570879772047412e-05, + "loss": 0.9396, + "step": 10615 + }, + { + "epoch": 0.57, + "learning_rate": 4.5704873782177704e-05, + "loss": 0.8118, + "step": 10620 + }, + { + "epoch": 0.57, + "learning_rate": 4.5700948219237015e-05, + "loss": 0.7251, + "step": 10625 + }, + { + "epoch": 0.57, + "learning_rate": 4.569702103196008e-05, + "loss": 0.7845, + "step": 10630 + }, + { + "epoch": 0.57, + "learning_rate": 4.569309222065505e-05, + "loss": 0.68, + "step": 10635 + }, + { + "epoch": 0.57, + "learning_rate": 4.56891617856302e-05, + "loss": 0.7675, + "step": 10640 + }, + { + "epoch": 0.57, + "learning_rate": 4.568522972719395e-05, + "loss": 0.663, + "step": 10645 + }, + { + "epoch": 0.57, + "learning_rate": 4.568129604565483e-05, + "loss": 0.7192, + "step": 10650 + }, + { + "epoch": 0.57, + "learning_rate": 4.5677360741321495e-05, + "loss": 0.7118, + "step": 10655 + }, + { + "epoch": 0.57, + "learning_rate": 4.5673423814502754e-05, + "loss": 0.7883, + "step": 10660 + }, + { + "epoch": 0.57, + "learning_rate": 4.566948526550751e-05, + "loss": 0.7901, + "step": 10665 + }, + { + "epoch": 0.57, + "learning_rate": 4.566554509464482e-05, + "loss": 0.7413, + "step": 10670 + }, + { + "epoch": 0.57, + "learning_rate": 4.566160330222384e-05, + "loss": 0.8076, + "step": 10675 + }, + { + "epoch": 0.57, + "learning_rate": 4.565765988855389e-05, + "loss": 0.7808, + "step": 10680 + }, + { + "epoch": 0.57, + "learning_rate": 4.565371485394438e-05, + "loss": 0.7337, + "step": 10685 + }, + { + "epoch": 0.57, + "learning_rate": 4.5649768198704867e-05, + "loss": 0.768, + "step": 10690 + }, + { + "epoch": 0.57, + "learning_rate": 4.564581992314504e-05, + "loss": 0.6409, + "step": 10695 + }, + { + "epoch": 0.57, + "learning_rate": 4.564187002757471e-05, + "loss": 0.9355, + "step": 10700 + }, + { + "epoch": 0.57, + "learning_rate": 4.563791851230379e-05, + "loss": 0.9525, + "step": 10705 + }, + { + "epoch": 0.57, + "learning_rate": 4.563396537764238e-05, + "loss": 0.7921, + "step": 10710 + }, + { + "epoch": 0.57, + "learning_rate": 4.563001062390062e-05, + "loss": 0.6906, + "step": 10715 + }, + { + "epoch": 0.57, + "learning_rate": 4.562605425138887e-05, + "loss": 0.8456, + "step": 10720 + }, + { + "epoch": 0.57, + "learning_rate": 4.5622096260417556e-05, + "loss": 0.7792, + "step": 10725 + }, + { + "epoch": 0.57, + "learning_rate": 4.5618136651297255e-05, + "loss": 0.6354, + "step": 10730 + }, + { + "epoch": 0.57, + "learning_rate": 4.561417542433866e-05, + "loss": 0.7007, + "step": 10735 + }, + { + "epoch": 0.57, + "learning_rate": 4.561021257985259e-05, + "loss": 0.8787, + "step": 10740 + }, + { + "epoch": 0.57, + "learning_rate": 4.5606248118150005e-05, + "loss": 0.7196, + "step": 10745 + }, + { + "epoch": 0.58, + "learning_rate": 4.5602282039541984e-05, + "loss": 0.938, + "step": 10750 + }, + { + "epoch": 0.58, + "learning_rate": 4.559831434433973e-05, + "loss": 0.8162, + "step": 10755 + }, + { + "epoch": 0.58, + "learning_rate": 4.5594345032854566e-05, + "loss": 0.8315, + "step": 10760 + }, + { + "epoch": 0.58, + "learning_rate": 4.559037410539797e-05, + "loss": 0.9728, + "step": 10765 + }, + { + "epoch": 0.58, + "learning_rate": 4.558640156228151e-05, + "loss": 0.9368, + "step": 10770 + }, + { + "epoch": 0.58, + "learning_rate": 4.5582427403816906e-05, + "loss": 0.7606, + "step": 10775 + }, + { + "epoch": 0.58, + "learning_rate": 4.557845163031601e-05, + "loss": 0.7466, + "step": 10780 + }, + { + "epoch": 0.58, + "learning_rate": 4.557447424209076e-05, + "loss": 0.9268, + "step": 10785 + }, + { + "epoch": 0.58, + "learning_rate": 4.557049523945327e-05, + "loss": 0.8981, + "step": 10790 + }, + { + "epoch": 0.58, + "learning_rate": 4.556651462271575e-05, + "loss": 0.98, + "step": 10795 + }, + { + "epoch": 0.58, + "learning_rate": 4.5562532392190556e-05, + "loss": 0.8729, + "step": 10800 + }, + { + "epoch": 0.58, + "learning_rate": 4.555854854819015e-05, + "loss": 0.6925, + "step": 10805 + }, + { + "epoch": 0.58, + "learning_rate": 4.555456309102714e-05, + "loss": 0.8089, + "step": 10810 + }, + { + "epoch": 0.58, + "learning_rate": 4.555057602101424e-05, + "loss": 0.6679, + "step": 10815 + }, + { + "epoch": 0.58, + "learning_rate": 4.5546587338464316e-05, + "loss": 0.7568, + "step": 10820 + }, + { + "epoch": 0.58, + "learning_rate": 4.554259704369034e-05, + "loss": 0.8799, + "step": 10825 + }, + { + "epoch": 0.58, + "learning_rate": 4.5538605137005416e-05, + "loss": 0.827, + "step": 10830 + }, + { + "epoch": 0.58, + "learning_rate": 4.553461161872278e-05, + "loss": 0.7771, + "step": 10835 + }, + { + "epoch": 0.58, + "learning_rate": 4.5530616489155785e-05, + "loss": 0.6974, + "step": 10840 + }, + { + "epoch": 0.58, + "learning_rate": 4.552661974861793e-05, + "loss": 0.8431, + "step": 10845 + }, + { + "epoch": 0.58, + "learning_rate": 4.5522621397422805e-05, + "loss": 0.818, + "step": 10850 + }, + { + "epoch": 0.58, + "learning_rate": 4.551862143588416e-05, + "loss": 0.8314, + "step": 10855 + }, + { + "epoch": 0.58, + "learning_rate": 4.551461986431587e-05, + "loss": 0.7711, + "step": 10860 + }, + { + "epoch": 0.58, + "learning_rate": 4.551061668303189e-05, + "loss": 0.7723, + "step": 10865 + }, + { + "epoch": 0.58, + "learning_rate": 4.550661189234638e-05, + "loss": 0.897, + "step": 10870 + }, + { + "epoch": 0.58, + "learning_rate": 4.550260549257356e-05, + "loss": 0.7719, + "step": 10875 + }, + { + "epoch": 0.58, + "learning_rate": 4.549859748402779e-05, + "loss": 0.6446, + "step": 10880 + }, + { + "epoch": 0.58, + "learning_rate": 4.549458786702358e-05, + "loss": 0.91, + "step": 10885 + }, + { + "epoch": 0.58, + "learning_rate": 4.549057664187556e-05, + "loss": 0.7295, + "step": 10890 + }, + { + "epoch": 0.58, + "learning_rate": 4.5486563808898465e-05, + "loss": 0.7739, + "step": 10895 + }, + { + "epoch": 0.58, + "learning_rate": 4.548254936840716e-05, + "loss": 0.8616, + "step": 10900 + }, + { + "epoch": 0.58, + "learning_rate": 4.5478533320716664e-05, + "loss": 0.6855, + "step": 10905 + }, + { + "epoch": 0.58, + "learning_rate": 4.547451566614209e-05, + "loss": 0.7904, + "step": 10910 + }, + { + "epoch": 0.58, + "learning_rate": 4.5470496404998694e-05, + "loss": 0.7656, + "step": 10915 + }, + { + "epoch": 0.58, + "learning_rate": 4.546647553760186e-05, + "loss": 0.6199, + "step": 10920 + }, + { + "epoch": 0.58, + "learning_rate": 4.5462453064267085e-05, + "loss": 0.7672, + "step": 10925 + }, + { + "epoch": 0.58, + "learning_rate": 4.545842898531001e-05, + "loss": 0.9704, + "step": 10930 + }, + { + "epoch": 0.58, + "learning_rate": 4.5454403301046376e-05, + "loss": 0.8178, + "step": 10935 + }, + { + "epoch": 0.59, + "learning_rate": 4.5450376011792076e-05, + "loss": 0.7388, + "step": 10940 + }, + { + "epoch": 0.59, + "learning_rate": 4.544634711786312e-05, + "loss": 0.8091, + "step": 10945 + }, + { + "epoch": 0.59, + "learning_rate": 4.544231661957563e-05, + "loss": 0.8149, + "step": 10950 + }, + { + "epoch": 0.59, + "learning_rate": 4.543828451724588e-05, + "loss": 0.6827, + "step": 10955 + }, + { + "epoch": 0.59, + "learning_rate": 4.543425081119024e-05, + "loss": 0.7941, + "step": 10960 + }, + { + "epoch": 0.59, + "learning_rate": 4.543021550172524e-05, + "loss": 0.8306, + "step": 10965 + }, + { + "epoch": 0.59, + "learning_rate": 4.542617858916751e-05, + "loss": 0.9001, + "step": 10970 + }, + { + "epoch": 0.59, + "learning_rate": 4.542214007383381e-05, + "loss": 0.6837, + "step": 10975 + }, + { + "epoch": 0.59, + "learning_rate": 4.541809995604104e-05, + "loss": 0.758, + "step": 10980 + }, + { + "epoch": 0.59, + "learning_rate": 4.541405823610619e-05, + "loss": 0.8964, + "step": 10985 + }, + { + "epoch": 0.59, + "learning_rate": 4.541001491434643e-05, + "loss": 0.8149, + "step": 10990 + }, + { + "epoch": 0.59, + "learning_rate": 4.540596999107901e-05, + "loss": 0.5443, + "step": 10995 + }, + { + "epoch": 0.59, + "learning_rate": 4.540192346662133e-05, + "loss": 0.7648, + "step": 11000 + }, + { + "epoch": 0.59, + "learning_rate": 4.5397875341290906e-05, + "loss": 0.7131, + "step": 11005 + }, + { + "epoch": 0.59, + "learning_rate": 4.539382561540537e-05, + "loss": 0.714, + "step": 11010 + }, + { + "epoch": 0.59, + "learning_rate": 4.5389774289282506e-05, + "loss": 0.6926, + "step": 11015 + }, + { + "epoch": 0.59, + "learning_rate": 4.5385721363240205e-05, + "loss": 0.7779, + "step": 11020 + }, + { + "epoch": 0.59, + "learning_rate": 4.538166683759648e-05, + "loss": 0.7415, + "step": 11025 + }, + { + "epoch": 0.59, + "learning_rate": 4.537761071266948e-05, + "loss": 0.7751, + "step": 11030 + }, + { + "epoch": 0.59, + "learning_rate": 4.537355298877747e-05, + "loss": 0.8838, + "step": 11035 + }, + { + "epoch": 0.59, + "learning_rate": 4.536949366623887e-05, + "loss": 0.723, + "step": 11040 + }, + { + "epoch": 0.59, + "learning_rate": 4.5365432745372173e-05, + "loss": 1.0116, + "step": 11045 + }, + { + "epoch": 0.59, + "learning_rate": 4.5361370226496034e-05, + "loss": 0.7479, + "step": 11050 + }, + { + "epoch": 0.59, + "learning_rate": 4.535730610992924e-05, + "loss": 0.7617, + "step": 11055 + }, + { + "epoch": 0.59, + "learning_rate": 4.535324039599067e-05, + "loss": 0.8237, + "step": 11060 + }, + { + "epoch": 0.59, + "learning_rate": 4.5349173084999366e-05, + "loss": 0.6832, + "step": 11065 + }, + { + "epoch": 0.59, + "learning_rate": 4.534510417727445e-05, + "loss": 0.7419, + "step": 11070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5341033673135235e-05, + "loss": 0.7406, + "step": 11075 + }, + { + "epoch": 0.59, + "learning_rate": 4.5336961572901084e-05, + "loss": 0.704, + "step": 11080 + }, + { + "epoch": 0.59, + "learning_rate": 4.533288787689154e-05, + "loss": 0.854, + "step": 11085 + }, + { + "epoch": 0.59, + "learning_rate": 4.532881258542625e-05, + "loss": 0.8356, + "step": 11090 + }, + { + "epoch": 0.59, + "learning_rate": 4.532473569882498e-05, + "loss": 0.7699, + "step": 11095 + }, + { + "epoch": 0.59, + "learning_rate": 4.5320657217407645e-05, + "loss": 0.7127, + "step": 11100 + }, + { + "epoch": 0.59, + "learning_rate": 4.531657714149427e-05, + "loss": 0.7333, + "step": 11105 + }, + { + "epoch": 0.59, + "learning_rate": 4.531249547140498e-05, + "loss": 0.8154, + "step": 11110 + }, + { + "epoch": 0.59, + "learning_rate": 4.530841220746008e-05, + "loss": 0.7927, + "step": 11115 + }, + { + "epoch": 0.59, + "learning_rate": 4.530432734997996e-05, + "loss": 0.6829, + "step": 11120 + }, + { + "epoch": 0.6, + "learning_rate": 4.530024089928513e-05, + "loss": 0.6931, + "step": 11125 + }, + { + "epoch": 0.6, + "learning_rate": 4.5296152855696275e-05, + "loss": 0.6714, + "step": 11130 + }, + { + "epoch": 0.6, + "learning_rate": 4.529206321953413e-05, + "loss": 0.8873, + "step": 11135 + }, + { + "epoch": 0.6, + "learning_rate": 4.5287971991119626e-05, + "loss": 0.7947, + "step": 11140 + }, + { + "epoch": 0.6, + "learning_rate": 4.528387917077378e-05, + "loss": 0.7713, + "step": 11145 + }, + { + "epoch": 0.6, + "learning_rate": 4.527978475881774e-05, + "loss": 0.851, + "step": 11150 + }, + { + "epoch": 0.6, + "learning_rate": 4.527568875557278e-05, + "loss": 0.7505, + "step": 11155 + }, + { + "epoch": 0.6, + "learning_rate": 4.52715911613603e-05, + "loss": 0.7439, + "step": 11160 + }, + { + "epoch": 0.6, + "learning_rate": 4.5267491976501834e-05, + "loss": 0.7772, + "step": 11165 + }, + { + "epoch": 0.6, + "learning_rate": 4.5263391201319016e-05, + "loss": 0.7568, + "step": 11170 + }, + { + "epoch": 0.6, + "learning_rate": 4.5259288836133635e-05, + "loss": 0.9697, + "step": 11175 + }, + { + "epoch": 0.6, + "learning_rate": 4.525518488126758e-05, + "loss": 0.8261, + "step": 11180 + }, + { + "epoch": 0.6, + "learning_rate": 4.525107933704289e-05, + "loss": 0.7863, + "step": 11185 + }, + { + "epoch": 0.6, + "learning_rate": 4.524697220378169e-05, + "loss": 0.735, + "step": 11190 + }, + { + "epoch": 0.6, + "learning_rate": 4.524286348180627e-05, + "loss": 0.8028, + "step": 11195 + }, + { + "epoch": 0.6, + "learning_rate": 4.5238753171439024e-05, + "loss": 0.8267, + "step": 11200 + }, + { + "epoch": 0.6, + "learning_rate": 4.5234641273002474e-05, + "loss": 0.6984, + "step": 11205 + }, + { + "epoch": 0.6, + "learning_rate": 4.523052778681928e-05, + "loss": 0.8856, + "step": 11210 + }, + { + "epoch": 0.6, + "learning_rate": 4.522641271321219e-05, + "loss": 0.9242, + "step": 11215 + }, + { + "epoch": 0.6, + "learning_rate": 4.522229605250412e-05, + "loss": 0.6925, + "step": 11220 + }, + { + "epoch": 0.6, + "learning_rate": 4.521817780501808e-05, + "loss": 0.738, + "step": 11225 + }, + { + "epoch": 0.6, + "learning_rate": 4.5214057971077216e-05, + "loss": 0.837, + "step": 11230 + }, + { + "epoch": 0.6, + "learning_rate": 4.520993655100481e-05, + "loss": 0.8385, + "step": 11235 + }, + { + "epoch": 0.6, + "learning_rate": 4.5205813545124244e-05, + "loss": 0.7381, + "step": 11240 + }, + { + "epoch": 0.6, + "learning_rate": 4.520168895375905e-05, + "loss": 0.8823, + "step": 11245 + }, + { + "epoch": 0.6, + "learning_rate": 4.519756277723285e-05, + "loss": 0.6183, + "step": 11250 + }, + { + "epoch": 0.6, + "learning_rate": 4.519343501586943e-05, + "loss": 0.7952, + "step": 11255 + }, + { + "epoch": 0.6, + "learning_rate": 4.518930566999267e-05, + "loss": 0.6961, + "step": 11260 + }, + { + "epoch": 0.6, + "learning_rate": 4.51851747399266e-05, + "loss": 0.8361, + "step": 11265 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181042225995344e-05, + "loss": 0.6481, + "step": 11270 + }, + { + "epoch": 0.6, + "learning_rate": 4.517690812852319e-05, + "loss": 0.8791, + "step": 11275 + }, + { + "epoch": 0.6, + "learning_rate": 4.51727724478345e-05, + "loss": 0.8789, + "step": 11280 + }, + { + "epoch": 0.6, + "learning_rate": 4.5168635184253805e-05, + "loss": 0.8676, + "step": 11285 + }, + { + "epoch": 0.6, + "learning_rate": 4.5165324233925155e-05, + "loss": 0.8545, + "step": 11290 + }, + { + "epoch": 0.6, + "learning_rate": 4.516118412195701e-05, + "loss": 0.6939, + "step": 11295 + }, + { + "epoch": 0.6, + "learning_rate": 4.5157042428006165e-05, + "loss": 0.79, + "step": 11300 + }, + { + "epoch": 0.6, + "learning_rate": 4.515289915239759e-05, + "loss": 0.7704, + "step": 11305 + }, + { + "epoch": 0.61, + "learning_rate": 4.514875429545639e-05, + "loss": 0.8389, + "step": 11310 + }, + { + "epoch": 0.61, + "learning_rate": 4.514460785750782e-05, + "loss": 0.9743, + "step": 11315 + }, + { + "epoch": 0.61, + "learning_rate": 4.514045983887721e-05, + "loss": 0.6996, + "step": 11320 + }, + { + "epoch": 0.61, + "learning_rate": 4.513631023989007e-05, + "loss": 0.905, + "step": 11325 + }, + { + "epoch": 0.61, + "learning_rate": 4.5132159060871985e-05, + "loss": 0.6121, + "step": 11330 + }, + { + "epoch": 0.61, + "learning_rate": 4.512800630214869e-05, + "loss": 0.7722, + "step": 11335 + }, + { + "epoch": 0.61, + "learning_rate": 4.5123851964046045e-05, + "loss": 0.7694, + "step": 11340 + }, + { + "epoch": 0.61, + "learning_rate": 4.511969604689001e-05, + "loss": 0.8299, + "step": 11345 + }, + { + "epoch": 0.61, + "learning_rate": 4.511553855100671e-05, + "loss": 0.8293, + "step": 11350 + }, + { + "epoch": 0.61, + "learning_rate": 4.511137947672236e-05, + "loss": 0.662, + "step": 11355 + }, + { + "epoch": 0.61, + "learning_rate": 4.5107218824363306e-05, + "loss": 0.88, + "step": 11360 + }, + { + "epoch": 0.61, + "learning_rate": 4.510305659425602e-05, + "loss": 0.9379, + "step": 11365 + }, + { + "epoch": 0.61, + "learning_rate": 4.509889278672711e-05, + "loss": 0.7759, + "step": 11370 + }, + { + "epoch": 0.61, + "learning_rate": 4.509472740210328e-05, + "loss": 0.8812, + "step": 11375 + }, + { + "epoch": 0.61, + "learning_rate": 4.509056044071138e-05, + "loss": 0.8354, + "step": 11380 + }, + { + "epoch": 0.61, + "learning_rate": 4.508639190287839e-05, + "loss": 0.8905, + "step": 11385 + }, + { + "epoch": 0.61, + "learning_rate": 4.5082221788931384e-05, + "loss": 0.7021, + "step": 11390 + }, + { + "epoch": 0.61, + "learning_rate": 4.507805009919759e-05, + "loss": 0.7593, + "step": 11395 + }, + { + "epoch": 0.61, + "learning_rate": 4.507387683400434e-05, + "loss": 0.8714, + "step": 11400 + }, + { + "epoch": 0.61, + "learning_rate": 4.506970199367909e-05, + "loss": 0.671, + "step": 11405 + }, + { + "epoch": 0.61, + "learning_rate": 4.506552557854945e-05, + "loss": 0.6942, + "step": 11410 + }, + { + "epoch": 0.61, + "learning_rate": 4.50613475889431e-05, + "loss": 0.6638, + "step": 11415 + }, + { + "epoch": 0.61, + "learning_rate": 4.50571680251879e-05, + "loss": 0.7896, + "step": 11420 + }, + { + "epoch": 0.61, + "learning_rate": 4.505298688761179e-05, + "loss": 0.7905, + "step": 11425 + }, + { + "epoch": 0.61, + "learning_rate": 4.5048804176542855e-05, + "loss": 0.8022, + "step": 11430 + }, + { + "epoch": 0.61, + "learning_rate": 4.5044619892309295e-05, + "loss": 0.7792, + "step": 11435 + }, + { + "epoch": 0.61, + "learning_rate": 4.504043403523944e-05, + "loss": 0.6195, + "step": 11440 + }, + { + "epoch": 0.61, + "learning_rate": 4.5036246605661754e-05, + "loss": 0.7328, + "step": 11445 + }, + { + "epoch": 0.61, + "learning_rate": 4.5032057603904785e-05, + "loss": 0.757, + "step": 11450 + }, + { + "epoch": 0.61, + "learning_rate": 4.5027867030297257e-05, + "loss": 0.7619, + "step": 11455 + }, + { + "epoch": 0.61, + "learning_rate": 4.5023674885167966e-05, + "loss": 0.7739, + "step": 11460 + }, + { + "epoch": 0.61, + "learning_rate": 4.501948116884587e-05, + "loss": 0.8146, + "step": 11465 + }, + { + "epoch": 0.61, + "learning_rate": 4.501528588166004e-05, + "loss": 0.7016, + "step": 11470 + }, + { + "epoch": 0.61, + "learning_rate": 4.5011089023939655e-05, + "loss": 0.8032, + "step": 11475 + }, + { + "epoch": 0.61, + "learning_rate": 4.500689059601403e-05, + "loss": 0.7545, + "step": 11480 + }, + { + "epoch": 0.61, + "learning_rate": 4.5002690598212616e-05, + "loss": 0.8386, + "step": 11485 + }, + { + "epoch": 0.61, + "learning_rate": 4.499848903086497e-05, + "loss": 0.7125, + "step": 11490 + }, + { + "epoch": 0.61, + "learning_rate": 4.499428589430075e-05, + "loss": 0.7925, + "step": 11495 + }, + { + "epoch": 0.62, + "learning_rate": 4.499008118884979e-05, + "loss": 0.7435, + "step": 11500 + }, + { + "epoch": 0.62, + "learning_rate": 4.4985874914842015e-05, + "loss": 0.8457, + "step": 11505 + }, + { + "epoch": 0.62, + "learning_rate": 4.498166707260747e-05, + "loss": 0.8124, + "step": 11510 + }, + { + "epoch": 0.62, + "learning_rate": 4.497745766247634e-05, + "loss": 0.8022, + "step": 11515 + }, + { + "epoch": 0.62, + "learning_rate": 4.4973246684778905e-05, + "loss": 0.7935, + "step": 11520 + }, + { + "epoch": 0.62, + "learning_rate": 4.496903413984561e-05, + "loss": 0.7948, + "step": 11525 + }, + { + "epoch": 0.62, + "learning_rate": 4.496482002800699e-05, + "loss": 0.7123, + "step": 11530 + }, + { + "epoch": 0.62, + "learning_rate": 4.496060434959371e-05, + "loss": 0.6831, + "step": 11535 + }, + { + "epoch": 0.62, + "learning_rate": 4.495638710493656e-05, + "loss": 0.5735, + "step": 11540 + }, + { + "epoch": 0.62, + "learning_rate": 4.495216829436646e-05, + "loss": 0.6868, + "step": 11545 + }, + { + "epoch": 0.62, + "learning_rate": 4.4947947918214444e-05, + "loss": 0.8139, + "step": 11550 + }, + { + "epoch": 0.62, + "learning_rate": 4.4943725976811666e-05, + "loss": 0.7321, + "step": 11555 + }, + { + "epoch": 0.62, + "learning_rate": 4.493950247048942e-05, + "loss": 0.6833, + "step": 11560 + }, + { + "epoch": 0.62, + "learning_rate": 4.49361225389123e-05, + "loss": 0.7848, + "step": 11565 + }, + { + "epoch": 0.62, + "learning_rate": 4.4931896216570216e-05, + "loss": 0.733, + "step": 11570 + }, + { + "epoch": 0.62, + "learning_rate": 4.4927668330236895e-05, + "loss": 0.7849, + "step": 11575 + }, + { + "epoch": 0.62, + "learning_rate": 4.4923438880244094e-05, + "loss": 0.8142, + "step": 11580 + }, + { + "epoch": 0.62, + "learning_rate": 4.4919207866923674e-05, + "loss": 0.8539, + "step": 11585 + }, + { + "epoch": 0.62, + "learning_rate": 4.4914975290607644e-05, + "loss": 0.8426, + "step": 11590 + }, + { + "epoch": 0.62, + "learning_rate": 4.49107411516281e-05, + "loss": 0.8092, + "step": 11595 + }, + { + "epoch": 0.62, + "learning_rate": 4.49065054503173e-05, + "loss": 0.8492, + "step": 11600 + }, + { + "epoch": 0.62, + "learning_rate": 4.49022681870076e-05, + "loss": 0.7114, + "step": 11605 + }, + { + "epoch": 0.62, + "learning_rate": 4.4898029362031486e-05, + "loss": 0.7255, + "step": 11610 + }, + { + "epoch": 0.62, + "learning_rate": 4.489378897572155e-05, + "loss": 0.6828, + "step": 11615 + }, + { + "epoch": 0.62, + "learning_rate": 4.488954702841054e-05, + "loss": 0.8522, + "step": 11620 + }, + { + "epoch": 0.62, + "learning_rate": 4.48853035204313e-05, + "loss": 0.7798, + "step": 11625 + }, + { + "epoch": 0.62, + "learning_rate": 4.4881058452116803e-05, + "loss": 0.6972, + "step": 11630 + }, + { + "epoch": 0.62, + "learning_rate": 4.487681182380015e-05, + "loss": 0.7316, + "step": 11635 + }, + { + "epoch": 0.62, + "learning_rate": 4.4872563635814555e-05, + "loss": 0.7366, + "step": 11640 + }, + { + "epoch": 0.62, + "learning_rate": 4.486831388849336e-05, + "loss": 0.7457, + "step": 11645 + }, + { + "epoch": 0.62, + "learning_rate": 4.486406258217003e-05, + "loss": 0.8564, + "step": 11650 + }, + { + "epoch": 0.62, + "learning_rate": 4.485980971717816e-05, + "loss": 0.7643, + "step": 11655 + }, + { + "epoch": 0.62, + "learning_rate": 4.4855555293851445e-05, + "loss": 0.7106, + "step": 11660 + }, + { + "epoch": 0.62, + "learning_rate": 4.485129931252373e-05, + "loss": 0.7829, + "step": 11665 + }, + { + "epoch": 0.62, + "learning_rate": 4.484704177352895e-05, + "loss": 0.9266, + "step": 11670 + }, + { + "epoch": 0.62, + "learning_rate": 4.484278267720119e-05, + "loss": 0.9619, + "step": 11675 + }, + { + "epoch": 0.62, + "learning_rate": 4.4838522023874655e-05, + "loss": 0.9012, + "step": 11680 + }, + { + "epoch": 0.63, + "learning_rate": 4.483425981388365e-05, + "loss": 0.8963, + "step": 11685 + }, + { + "epoch": 0.63, + "learning_rate": 4.4829996047562626e-05, + "loss": 0.7888, + "step": 11690 + }, + { + "epoch": 0.63, + "learning_rate": 4.482573072524615e-05, + "loss": 0.8111, + "step": 11695 + }, + { + "epoch": 0.63, + "learning_rate": 4.482146384726889e-05, + "loss": 0.804, + "step": 11700 + }, + { + "epoch": 0.63, + "learning_rate": 4.4817195413965684e-05, + "loss": 0.7884, + "step": 11705 + }, + { + "epoch": 0.63, + "learning_rate": 4.4812925425671435e-05, + "loss": 0.8359, + "step": 11710 + }, + { + "epoch": 0.63, + "learning_rate": 4.4808653882721205e-05, + "loss": 0.8497, + "step": 11715 + }, + { + "epoch": 0.63, + "learning_rate": 4.480438078545017e-05, + "loss": 0.7625, + "step": 11720 + }, + { + "epoch": 0.63, + "learning_rate": 4.480010613419363e-05, + "loss": 0.8469, + "step": 11725 + }, + { + "epoch": 0.63, + "learning_rate": 4.4795829929286983e-05, + "loss": 0.6867, + "step": 11730 + }, + { + "epoch": 0.63, + "learning_rate": 4.4791552171065793e-05, + "loss": 0.7706, + "step": 11735 + }, + { + "epoch": 0.63, + "learning_rate": 4.478727285986571e-05, + "loss": 0.7705, + "step": 11740 + }, + { + "epoch": 0.63, + "learning_rate": 4.4782991996022516e-05, + "loss": 0.7344, + "step": 11745 + }, + { + "epoch": 0.63, + "learning_rate": 4.4778709579872125e-05, + "loss": 0.649, + "step": 11750 + }, + { + "epoch": 0.63, + "learning_rate": 4.477442561175056e-05, + "loss": 0.7686, + "step": 11755 + }, + { + "epoch": 0.63, + "learning_rate": 4.4770140091993975e-05, + "loss": 0.7635, + "step": 11760 + }, + { + "epoch": 0.63, + "learning_rate": 4.4765853020938616e-05, + "loss": 0.8724, + "step": 11765 + }, + { + "epoch": 0.63, + "learning_rate": 4.476156439892092e-05, + "loss": 0.7394, + "step": 11770 + }, + { + "epoch": 0.63, + "learning_rate": 4.475727422627736e-05, + "loss": 0.878, + "step": 11775 + }, + { + "epoch": 0.63, + "learning_rate": 4.475298250334459e-05, + "loss": 0.7403, + "step": 11780 + }, + { + "epoch": 0.63, + "learning_rate": 4.474868923045937e-05, + "loss": 0.6866, + "step": 11785 + }, + { + "epoch": 0.63, + "learning_rate": 4.474439440795857e-05, + "loss": 0.6778, + "step": 11790 + }, + { + "epoch": 0.63, + "learning_rate": 4.47400980361792e-05, + "loss": 0.8495, + "step": 11795 + }, + { + "epoch": 0.63, + "learning_rate": 4.4735800115458376e-05, + "loss": 0.8818, + "step": 11800 + }, + { + "epoch": 0.63, + "learning_rate": 4.4731500646133344e-05, + "loss": 0.7179, + "step": 11805 + }, + { + "epoch": 0.63, + "learning_rate": 4.4727199628541474e-05, + "loss": 0.7991, + "step": 11810 + }, + { + "epoch": 0.63, + "learning_rate": 4.4722897063020244e-05, + "loss": 0.7647, + "step": 11815 + }, + { + "epoch": 0.63, + "learning_rate": 4.471859294990727e-05, + "loss": 0.7148, + "step": 11820 + }, + { + "epoch": 0.63, + "learning_rate": 4.471428728954027e-05, + "loss": 0.7418, + "step": 11825 + }, + { + "epoch": 0.63, + "learning_rate": 4.470998008225711e-05, + "loss": 0.7474, + "step": 11830 + }, + { + "epoch": 0.63, + "learning_rate": 4.470567132839575e-05, + "loss": 0.7828, + "step": 11835 + }, + { + "epoch": 0.63, + "learning_rate": 4.47013610282943e-05, + "loss": 0.8215, + "step": 11840 + }, + { + "epoch": 0.63, + "learning_rate": 4.469704918229096e-05, + "loss": 0.8326, + "step": 11845 + }, + { + "epoch": 0.63, + "learning_rate": 4.469273579072407e-05, + "loss": 0.8397, + "step": 11850 + }, + { + "epoch": 0.63, + "learning_rate": 4.46884208539321e-05, + "loss": 0.6773, + "step": 11855 + }, + { + "epoch": 0.63, + "learning_rate": 4.468410437225361e-05, + "loss": 0.822, + "step": 11860 + }, + { + "epoch": 0.63, + "learning_rate": 4.4679786346027305e-05, + "loss": 0.7635, + "step": 11865 + }, + { + "epoch": 0.64, + "learning_rate": 4.4675466775592016e-05, + "loss": 0.7476, + "step": 11870 + }, + { + "epoch": 0.64, + "learning_rate": 4.467114566128668e-05, + "loss": 0.7371, + "step": 11875 + }, + { + "epoch": 0.64, + "learning_rate": 4.4666823003450365e-05, + "loss": 0.8943, + "step": 11880 + }, + { + "epoch": 0.64, + "learning_rate": 4.4662498802422245e-05, + "loss": 0.7084, + "step": 11885 + }, + { + "epoch": 0.64, + "learning_rate": 4.4658173058541644e-05, + "loss": 0.9649, + "step": 11890 + }, + { + "epoch": 0.64, + "learning_rate": 4.4653845772147976e-05, + "loss": 0.758, + "step": 11895 + }, + { + "epoch": 0.64, + "learning_rate": 4.464951694358078e-05, + "loss": 0.737, + "step": 11900 + }, + { + "epoch": 0.64, + "learning_rate": 4.464518657317974e-05, + "loss": 0.9439, + "step": 11905 + }, + { + "epoch": 0.64, + "learning_rate": 4.464085466128465e-05, + "loss": 0.8029, + "step": 11910 + }, + { + "epoch": 0.64, + "learning_rate": 4.463652120823541e-05, + "loss": 0.8127, + "step": 11915 + }, + { + "epoch": 0.64, + "learning_rate": 4.4632186214372056e-05, + "loss": 0.7715, + "step": 11920 + }, + { + "epoch": 0.64, + "learning_rate": 4.462784968003474e-05, + "loss": 0.8689, + "step": 11925 + }, + { + "epoch": 0.64, + "learning_rate": 4.4623511605563736e-05, + "loss": 0.7674, + "step": 11930 + }, + { + "epoch": 0.64, + "learning_rate": 4.461917199129944e-05, + "loss": 0.8544, + "step": 11935 + }, + { + "epoch": 0.64, + "learning_rate": 4.4614830837582364e-05, + "loss": 0.8383, + "step": 11940 + }, + { + "epoch": 0.64, + "learning_rate": 4.4610488144753157e-05, + "loss": 0.8173, + "step": 11945 + }, + { + "epoch": 0.64, + "learning_rate": 4.460614391315255e-05, + "loss": 0.6548, + "step": 11950 + }, + { + "epoch": 0.64, + "learning_rate": 4.460179814312145e-05, + "loss": 0.7145, + "step": 11955 + }, + { + "epoch": 0.64, + "learning_rate": 4.4597450835000835e-05, + "loss": 0.6893, + "step": 11960 + }, + { + "epoch": 0.64, + "learning_rate": 4.459310198913183e-05, + "loss": 0.6765, + "step": 11965 + }, + { + "epoch": 0.64, + "learning_rate": 4.4588751605855686e-05, + "loss": 0.8187, + "step": 11970 + }, + { + "epoch": 0.64, + "learning_rate": 4.458439968551374e-05, + "loss": 0.7609, + "step": 11975 + }, + { + "epoch": 0.64, + "learning_rate": 4.45800462284475e-05, + "loss": 0.8555, + "step": 11980 + }, + { + "epoch": 0.64, + "learning_rate": 4.457569123499854e-05, + "loss": 0.8014, + "step": 11985 + }, + { + "epoch": 0.64, + "learning_rate": 4.4571334705508607e-05, + "loss": 0.7674, + "step": 11990 + }, + { + "epoch": 0.64, + "learning_rate": 4.4566976640319527e-05, + "loss": 0.6711, + "step": 11995 + }, + { + "epoch": 0.64, + "learning_rate": 4.456261703977327e-05, + "loss": 0.6891, + "step": 12000 + }, + { + "epoch": 0.64, + "learning_rate": 4.455825590421192e-05, + "loss": 0.7042, + "step": 12005 + }, + { + "epoch": 0.64, + "learning_rate": 4.455389323397768e-05, + "loss": 0.8806, + "step": 12010 + }, + { + "epoch": 0.64, + "learning_rate": 4.4549529029412884e-05, + "loss": 0.8583, + "step": 12015 + }, + { + "epoch": 0.64, + "learning_rate": 4.454516329085996e-05, + "loss": 0.8913, + "step": 12020 + }, + { + "epoch": 0.64, + "learning_rate": 4.454079601866148e-05, + "loss": 0.6946, + "step": 12025 + }, + { + "epoch": 0.64, + "learning_rate": 4.4536427213160134e-05, + "loss": 0.8268, + "step": 12030 + }, + { + "epoch": 0.64, + "learning_rate": 4.453205687469872e-05, + "loss": 0.8099, + "step": 12035 + }, + { + "epoch": 0.64, + "learning_rate": 4.452768500362017e-05, + "loss": 0.7899, + "step": 12040 + }, + { + "epoch": 0.64, + "learning_rate": 4.4523311600267535e-05, + "loss": 0.805, + "step": 12045 + }, + { + "epoch": 0.64, + "learning_rate": 4.451893666498397e-05, + "loss": 0.6887, + "step": 12050 + }, + { + "epoch": 0.64, + "learning_rate": 4.451456019811277e-05, + "loss": 0.6805, + "step": 12055 + }, + { + "epoch": 0.65, + "learning_rate": 4.4510182199997344e-05, + "loss": 0.7977, + "step": 12060 + }, + { + "epoch": 0.65, + "learning_rate": 4.450580267098121e-05, + "loss": 0.8631, + "step": 12065 + }, + { + "epoch": 0.65, + "learning_rate": 4.4501421611408024e-05, + "loss": 0.707, + "step": 12070 + }, + { + "epoch": 0.65, + "learning_rate": 4.449703902162156e-05, + "loss": 0.6694, + "step": 12075 + }, + { + "epoch": 0.65, + "learning_rate": 4.449265490196568e-05, + "loss": 0.7346, + "step": 12080 + }, + { + "epoch": 0.65, + "learning_rate": 4.448826925278442e-05, + "loss": 0.7558, + "step": 12085 + }, + { + "epoch": 0.65, + "learning_rate": 4.448388207442189e-05, + "loss": 0.6895, + "step": 12090 + }, + { + "epoch": 0.65, + "learning_rate": 4.4479493367222346e-05, + "loss": 0.8271, + "step": 12095 + }, + { + "epoch": 0.65, + "learning_rate": 4.447510313153015e-05, + "loss": 0.6803, + "step": 12100 + }, + { + "epoch": 0.65, + "learning_rate": 4.44707113676898e-05, + "loss": 0.7462, + "step": 12105 + }, + { + "epoch": 0.65, + "learning_rate": 4.446631807604589e-05, + "loss": 0.6744, + "step": 12110 + }, + { + "epoch": 0.65, + "learning_rate": 4.4461923256943147e-05, + "loss": 0.6925, + "step": 12115 + }, + { + "epoch": 0.65, + "learning_rate": 4.4457526910726434e-05, + "loss": 0.7863, + "step": 12120 + }, + { + "epoch": 0.65, + "learning_rate": 4.445312903774071e-05, + "loss": 0.8851, + "step": 12125 + }, + { + "epoch": 0.65, + "learning_rate": 4.4448729638331056e-05, + "loss": 0.8196, + "step": 12130 + }, + { + "epoch": 0.65, + "learning_rate": 4.444432871284269e-05, + "loss": 0.7028, + "step": 12135 + }, + { + "epoch": 0.65, + "learning_rate": 4.443992626162092e-05, + "loss": 0.7868, + "step": 12140 + }, + { + "epoch": 0.65, + "learning_rate": 4.443552228501121e-05, + "loss": 0.8979, + "step": 12145 + }, + { + "epoch": 0.65, + "learning_rate": 4.443111678335911e-05, + "loss": 0.7723, + "step": 12150 + }, + { + "epoch": 0.65, + "learning_rate": 4.4426709757010324e-05, + "loss": 0.8113, + "step": 12155 + }, + { + "epoch": 0.65, + "learning_rate": 4.442230120631065e-05, + "loss": 0.6194, + "step": 12160 + }, + { + "epoch": 0.65, + "learning_rate": 4.4417891131606005e-05, + "loss": 0.7462, + "step": 12165 + }, + { + "epoch": 0.65, + "learning_rate": 4.441347953324244e-05, + "loss": 0.8843, + "step": 12170 + }, + { + "epoch": 0.65, + "learning_rate": 4.4409066411566114e-05, + "loss": 0.7431, + "step": 12175 + }, + { + "epoch": 0.65, + "learning_rate": 4.440465176692332e-05, + "loss": 0.7888, + "step": 12180 + }, + { + "epoch": 0.65, + "learning_rate": 4.440023559966045e-05, + "loss": 0.8514, + "step": 12185 + }, + { + "epoch": 0.65, + "learning_rate": 4.439581791012403e-05, + "loss": 0.7558, + "step": 12190 + }, + { + "epoch": 0.65, + "learning_rate": 4.439139869866071e-05, + "loss": 0.7756, + "step": 12195 + }, + { + "epoch": 0.65, + "learning_rate": 4.438697796561724e-05, + "loss": 0.7798, + "step": 12200 + }, + { + "epoch": 0.65, + "learning_rate": 4.43825557113405e-05, + "loss": 0.6846, + "step": 12205 + }, + { + "epoch": 0.65, + "learning_rate": 4.43781319361775e-05, + "loss": 0.6656, + "step": 12210 + }, + { + "epoch": 0.65, + "learning_rate": 4.437370664047535e-05, + "loss": 0.6589, + "step": 12215 + }, + { + "epoch": 0.65, + "learning_rate": 4.436927982458129e-05, + "loss": 0.8864, + "step": 12220 + }, + { + "epoch": 0.65, + "learning_rate": 4.436485148884268e-05, + "loss": 0.9256, + "step": 12225 + }, + { + "epoch": 0.65, + "learning_rate": 4.4360421633607005e-05, + "loss": 0.8187, + "step": 12230 + }, + { + "epoch": 0.65, + "learning_rate": 4.435599025922185e-05, + "loss": 0.6863, + "step": 12235 + }, + { + "epoch": 0.65, + "learning_rate": 4.4351557366034934e-05, + "loss": 0.6832, + "step": 12240 + }, + { + "epoch": 0.66, + "learning_rate": 4.43471229543941e-05, + "loss": 0.6344, + "step": 12245 + }, + { + "epoch": 0.66, + "learning_rate": 4.434268702464728e-05, + "loss": 0.8072, + "step": 12250 + }, + { + "epoch": 0.66, + "learning_rate": 4.4338249577142564e-05, + "loss": 0.6251, + "step": 12255 + }, + { + "epoch": 0.66, + "learning_rate": 4.433381061222814e-05, + "loss": 0.8505, + "step": 12260 + }, + { + "epoch": 0.66, + "learning_rate": 4.432937013025232e-05, + "loss": 0.8873, + "step": 12265 + }, + { + "epoch": 0.66, + "learning_rate": 4.4324928131563546e-05, + "loss": 0.818, + "step": 12270 + }, + { + "epoch": 0.66, + "learning_rate": 4.432048461651034e-05, + "loss": 0.6063, + "step": 12275 + }, + { + "epoch": 0.66, + "learning_rate": 4.43160395854414e-05, + "loss": 0.7536, + "step": 12280 + }, + { + "epoch": 0.66, + "learning_rate": 4.431159303870549e-05, + "loss": 0.8557, + "step": 12285 + }, + { + "epoch": 0.66, + "learning_rate": 4.430714497665153e-05, + "loss": 0.7247, + "step": 12290 + }, + { + "epoch": 0.66, + "learning_rate": 4.430269539962854e-05, + "loss": 0.8816, + "step": 12295 + }, + { + "epoch": 0.66, + "learning_rate": 4.429824430798566e-05, + "loss": 0.8217, + "step": 12300 + }, + { + "epoch": 0.66, + "learning_rate": 4.429379170207215e-05, + "loss": 0.6246, + "step": 12305 + }, + { + "epoch": 0.66, + "learning_rate": 4.428933758223741e-05, + "loss": 0.9075, + "step": 12310 + }, + { + "epoch": 0.66, + "learning_rate": 4.428488194883093e-05, + "loss": 0.7784, + "step": 12315 + }, + { + "epoch": 0.66, + "learning_rate": 4.4280424802202315e-05, + "loss": 0.7788, + "step": 12320 + }, + { + "epoch": 0.66, + "learning_rate": 4.427596614270133e-05, + "loss": 0.8232, + "step": 12325 + }, + { + "epoch": 0.66, + "learning_rate": 4.427150597067781e-05, + "loss": 0.8237, + "step": 12330 + }, + { + "epoch": 0.66, + "learning_rate": 4.426704428648174e-05, + "loss": 0.8094, + "step": 12335 + }, + { + "epoch": 0.66, + "learning_rate": 4.426258109046321e-05, + "loss": 0.7078, + "step": 12340 + }, + { + "epoch": 0.66, + "learning_rate": 4.425811638297243e-05, + "loss": 0.8493, + "step": 12345 + }, + { + "epoch": 0.66, + "learning_rate": 4.4253650164359726e-05, + "loss": 0.813, + "step": 12350 + }, + { + "epoch": 0.66, + "learning_rate": 4.424918243497557e-05, + "loss": 0.8007, + "step": 12355 + }, + { + "epoch": 0.66, + "learning_rate": 4.4244713195170515e-05, + "loss": 0.8579, + "step": 12360 + }, + { + "epoch": 0.66, + "learning_rate": 4.424024244529524e-05, + "loss": 0.9052, + "step": 12365 + }, + { + "epoch": 0.66, + "learning_rate": 4.4235770185700575e-05, + "loss": 0.7603, + "step": 12370 + }, + { + "epoch": 0.66, + "learning_rate": 4.4231296416737425e-05, + "loss": 0.7722, + "step": 12375 + }, + { + "epoch": 0.66, + "learning_rate": 4.422682113875683e-05, + "loss": 0.8097, + "step": 12380 + }, + { + "epoch": 0.66, + "learning_rate": 4.4222344352109955e-05, + "loss": 0.7279, + "step": 12385 + }, + { + "epoch": 0.66, + "learning_rate": 4.421786605714808e-05, + "loss": 0.7405, + "step": 12390 + }, + { + "epoch": 0.66, + "learning_rate": 4.421338625422261e-05, + "loss": 0.6884, + "step": 12395 + }, + { + "epoch": 0.66, + "learning_rate": 4.4208904943685045e-05, + "loss": 0.854, + "step": 12400 + }, + { + "epoch": 0.66, + "learning_rate": 4.420442212588703e-05, + "loss": 0.74, + "step": 12405 + }, + { + "epoch": 0.66, + "learning_rate": 4.4199937801180314e-05, + "loss": 0.672, + "step": 12410 + }, + { + "epoch": 0.66, + "learning_rate": 4.419545196991677e-05, + "loss": 0.8228, + "step": 12415 + }, + { + "epoch": 0.66, + "learning_rate": 4.4190964632448384e-05, + "loss": 0.8464, + "step": 12420 + }, + { + "epoch": 0.66, + "learning_rate": 4.418647578912726e-05, + "loss": 0.8099, + "step": 12425 + }, + { + "epoch": 0.66, + "learning_rate": 4.4181985440305626e-05, + "loss": 0.7404, + "step": 12430 + }, + { + "epoch": 0.67, + "learning_rate": 4.417749358633582e-05, + "loss": 0.6858, + "step": 12435 + }, + { + "epoch": 0.67, + "learning_rate": 4.4173000227570315e-05, + "loss": 0.7632, + "step": 12440 + }, + { + "epoch": 0.67, + "learning_rate": 4.416850536436168e-05, + "loss": 0.8343, + "step": 12445 + }, + { + "epoch": 0.67, + "learning_rate": 4.416400899706261e-05, + "loss": 0.8169, + "step": 12450 + }, + { + "epoch": 0.67, + "learning_rate": 4.415951112602593e-05, + "loss": 0.7808, + "step": 12455 + }, + { + "epoch": 0.67, + "learning_rate": 4.415501175160458e-05, + "loss": 0.759, + "step": 12460 + }, + { + "epoch": 0.67, + "learning_rate": 4.415051087415159e-05, + "loss": 0.8992, + "step": 12465 + }, + { + "epoch": 0.67, + "learning_rate": 4.4146008494020144e-05, + "loss": 0.9015, + "step": 12470 + }, + { + "epoch": 0.67, + "learning_rate": 4.414150461156352e-05, + "loss": 0.7866, + "step": 12475 + }, + { + "epoch": 0.67, + "learning_rate": 4.4136999227135136e-05, + "loss": 0.7988, + "step": 12480 + }, + { + "epoch": 0.67, + "learning_rate": 4.41324923410885e-05, + "loss": 0.8147, + "step": 12485 + }, + { + "epoch": 0.67, + "learning_rate": 4.412798395377726e-05, + "loss": 0.8618, + "step": 12490 + }, + { + "epoch": 0.67, + "learning_rate": 4.412347406555518e-05, + "loss": 0.7601, + "step": 12495 + }, + { + "epoch": 0.67, + "learning_rate": 4.411896267677612e-05, + "loss": 0.7719, + "step": 12500 + }, + { + "epoch": 0.67, + "learning_rate": 4.41144497877941e-05, + "loss": 0.7976, + "step": 12505 + }, + { + "epoch": 0.67, + "learning_rate": 4.410993539896321e-05, + "loss": 0.7467, + "step": 12510 + }, + { + "epoch": 0.67, + "learning_rate": 4.410541951063768e-05, + "loss": 0.6516, + "step": 12515 + }, + { + "epoch": 0.67, + "learning_rate": 4.410090212317187e-05, + "loss": 0.8148, + "step": 12520 + }, + { + "epoch": 0.67, + "learning_rate": 4.409638323692024e-05, + "loss": 0.9414, + "step": 12525 + }, + { + "epoch": 0.67, + "learning_rate": 4.4091862852237355e-05, + "loss": 0.9168, + "step": 12530 + }, + { + "epoch": 0.67, + "learning_rate": 4.408734096947794e-05, + "loss": 0.7589, + "step": 12535 + }, + { + "epoch": 0.67, + "learning_rate": 4.4082817588996805e-05, + "loss": 0.85, + "step": 12540 + }, + { + "epoch": 0.67, + "learning_rate": 4.407829271114888e-05, + "loss": 0.7882, + "step": 12545 + }, + { + "epoch": 0.67, + "learning_rate": 4.4073766336289216e-05, + "loss": 0.9139, + "step": 12550 + }, + { + "epoch": 0.67, + "learning_rate": 4.4069238464772994e-05, + "loss": 0.7504, + "step": 12555 + }, + { + "epoch": 0.67, + "learning_rate": 4.4065615090206036e-05, + "loss": 0.8008, + "step": 12560 + }, + { + "epoch": 0.67, + "learning_rate": 4.406108452560339e-05, + "loss": 0.7145, + "step": 12565 + }, + { + "epoch": 0.67, + "learning_rate": 4.405655246533929e-05, + "loss": 0.8822, + "step": 12570 + }, + { + "epoch": 0.67, + "learning_rate": 4.405201890976934e-05, + "loss": 0.8377, + "step": 12575 + }, + { + "epoch": 0.67, + "learning_rate": 4.404748385924927e-05, + "loss": 0.8367, + "step": 12580 + }, + { + "epoch": 0.67, + "learning_rate": 4.4042947314134935e-05, + "loss": 0.7154, + "step": 12585 + }, + { + "epoch": 0.67, + "learning_rate": 4.4038409274782306e-05, + "loss": 0.9397, + "step": 12590 + }, + { + "epoch": 0.67, + "learning_rate": 4.403386974154747e-05, + "loss": 0.9481, + "step": 12595 + }, + { + "epoch": 0.67, + "learning_rate": 4.4029328714786613e-05, + "loss": 1.0109, + "step": 12600 + }, + { + "epoch": 0.67, + "learning_rate": 4.402478619485608e-05, + "loss": 0.7807, + "step": 12605 + }, + { + "epoch": 0.67, + "learning_rate": 4.402024218211229e-05, + "loss": 0.9127, + "step": 12610 + }, + { + "epoch": 0.67, + "learning_rate": 4.40156966769118e-05, + "loss": 0.8309, + "step": 12615 + }, + { + "epoch": 0.68, + "learning_rate": 4.4011149679611285e-05, + "loss": 0.7362, + "step": 12620 + }, + { + "epoch": 0.68, + "learning_rate": 4.400660119056753e-05, + "loss": 0.8508, + "step": 12625 + }, + { + "epoch": 0.68, + "learning_rate": 4.400205121013744e-05, + "loss": 0.8496, + "step": 12630 + }, + { + "epoch": 0.68, + "learning_rate": 4.399749973867804e-05, + "loss": 0.8917, + "step": 12635 + }, + { + "epoch": 0.68, + "learning_rate": 4.399294677654646e-05, + "loss": 0.7266, + "step": 12640 + }, + { + "epoch": 0.68, + "learning_rate": 4.398839232409997e-05, + "loss": 0.8649, + "step": 12645 + }, + { + "epoch": 0.68, + "learning_rate": 4.398383638169594e-05, + "loss": 0.9445, + "step": 12650 + }, + { + "epoch": 0.68, + "learning_rate": 4.397927894969185e-05, + "loss": 0.8074, + "step": 12655 + }, + { + "epoch": 0.68, + "learning_rate": 4.397472002844532e-05, + "loss": 0.6829, + "step": 12660 + }, + { + "epoch": 0.68, + "learning_rate": 4.3970159618314064e-05, + "loss": 0.8344, + "step": 12665 + }, + { + "epoch": 0.68, + "learning_rate": 4.396559771965592e-05, + "loss": 0.8884, + "step": 12670 + }, + { + "epoch": 0.68, + "learning_rate": 4.396103433282885e-05, + "loss": 0.8908, + "step": 12675 + }, + { + "epoch": 0.68, + "learning_rate": 4.395646945819094e-05, + "loss": 0.7523, + "step": 12680 + }, + { + "epoch": 0.68, + "learning_rate": 4.395190309610037e-05, + "loss": 0.7315, + "step": 12685 + }, + { + "epoch": 0.68, + "learning_rate": 4.3947335246915445e-05, + "loss": 0.6234, + "step": 12690 + }, + { + "epoch": 0.68, + "learning_rate": 4.3942765910994594e-05, + "loss": 0.7772, + "step": 12695 + }, + { + "epoch": 0.68, + "learning_rate": 4.3938195088696356e-05, + "loss": 0.8661, + "step": 12700 + }, + { + "epoch": 0.68, + "learning_rate": 4.393362278037938e-05, + "loss": 0.7324, + "step": 12705 + }, + { + "epoch": 0.68, + "learning_rate": 4.392904898640246e-05, + "loss": 0.8745, + "step": 12710 + }, + { + "epoch": 0.68, + "learning_rate": 4.392447370712447e-05, + "loss": 0.7445, + "step": 12715 + }, + { + "epoch": 0.68, + "learning_rate": 4.391989694290443e-05, + "loss": 0.8657, + "step": 12720 + }, + { + "epoch": 0.68, + "learning_rate": 4.3915318694101445e-05, + "loss": 0.8338, + "step": 12725 + }, + { + "epoch": 0.68, + "learning_rate": 4.391073896107477e-05, + "loss": 0.7449, + "step": 12730 + }, + { + "epoch": 0.68, + "learning_rate": 4.3906157744183766e-05, + "loss": 0.7508, + "step": 12735 + }, + { + "epoch": 0.68, + "learning_rate": 4.39015750437879e-05, + "loss": 0.786, + "step": 12740 + }, + { + "epoch": 0.68, + "learning_rate": 4.389699086024676e-05, + "loss": 0.6942, + "step": 12745 + }, + { + "epoch": 0.68, + "learning_rate": 4.389240519392005e-05, + "loss": 1.0015, + "step": 12750 + }, + { + "epoch": 0.68, + "learning_rate": 4.38878180451676e-05, + "loss": 0.7224, + "step": 12755 + }, + { + "epoch": 0.68, + "learning_rate": 4.3883229414349334e-05, + "loss": 0.7491, + "step": 12760 + }, + { + "epoch": 0.68, + "learning_rate": 4.387863930182532e-05, + "loss": 0.7287, + "step": 12765 + }, + { + "epoch": 0.68, + "learning_rate": 4.387404770795573e-05, + "loss": 0.7377, + "step": 12770 + }, + { + "epoch": 0.68, + "learning_rate": 4.386945463310085e-05, + "loss": 0.8481, + "step": 12775 + }, + { + "epoch": 0.68, + "learning_rate": 4.3864860077621074e-05, + "loss": 0.7454, + "step": 12780 + }, + { + "epoch": 0.68, + "learning_rate": 4.386026404187693e-05, + "loss": 0.7473, + "step": 12785 + }, + { + "epoch": 0.68, + "learning_rate": 4.385566652622906e-05, + "loss": 0.6627, + "step": 12790 + }, + { + "epoch": 0.68, + "learning_rate": 4.3851067531038206e-05, + "loss": 0.8933, + "step": 12795 + }, + { + "epoch": 0.68, + "learning_rate": 4.384646705666524e-05, + "loss": 0.8745, + "step": 12800 + }, + { + "epoch": 0.69, + "learning_rate": 4.384186510347114e-05, + "loss": 0.6821, + "step": 12805 + }, + { + "epoch": 0.69, + "learning_rate": 4.383726167181702e-05, + "loss": 0.7889, + "step": 12810 + }, + { + "epoch": 0.69, + "learning_rate": 4.383265676206408e-05, + "loss": 0.8617, + "step": 12815 + }, + { + "epoch": 0.69, + "learning_rate": 4.382805037457367e-05, + "loss": 0.8249, + "step": 12820 + }, + { + "epoch": 0.69, + "learning_rate": 4.3823442509707225e-05, + "loss": 0.7794, + "step": 12825 + }, + { + "epoch": 0.69, + "learning_rate": 4.3818833167826316e-05, + "loss": 0.9807, + "step": 12830 + }, + { + "epoch": 0.69, + "learning_rate": 4.381422234929262e-05, + "loss": 0.7749, + "step": 12835 + }, + { + "epoch": 0.69, + "learning_rate": 4.3809610054467934e-05, + "loss": 0.8389, + "step": 12840 + }, + { + "epoch": 0.69, + "learning_rate": 4.380499628371417e-05, + "loss": 0.7548, + "step": 12845 + }, + { + "epoch": 0.69, + "learning_rate": 4.380038103739335e-05, + "loss": 0.7417, + "step": 12850 + }, + { + "epoch": 0.69, + "learning_rate": 4.3795764315867625e-05, + "loss": 0.7841, + "step": 12855 + }, + { + "epoch": 0.69, + "learning_rate": 4.3791146119499246e-05, + "loss": 0.8969, + "step": 12860 + }, + { + "epoch": 0.69, + "learning_rate": 4.3786526448650614e-05, + "loss": 0.7581, + "step": 12865 + }, + { + "epoch": 0.69, + "learning_rate": 4.378190530368418e-05, + "loss": 0.8292, + "step": 12870 + }, + { + "epoch": 0.69, + "learning_rate": 4.377728268496257e-05, + "loss": 0.7353, + "step": 12875 + }, + { + "epoch": 0.69, + "learning_rate": 4.377265859284851e-05, + "loss": 0.7211, + "step": 12880 + }, + { + "epoch": 0.69, + "learning_rate": 4.376803302770483e-05, + "loss": 0.7554, + "step": 12885 + }, + { + "epoch": 0.69, + "learning_rate": 4.376340598989448e-05, + "loss": 0.815, + "step": 12890 + }, + { + "epoch": 0.69, + "learning_rate": 4.3758777479780545e-05, + "loss": 0.8335, + "step": 12895 + }, + { + "epoch": 0.69, + "learning_rate": 4.375414749772619e-05, + "loss": 0.7592, + "step": 12900 + }, + { + "epoch": 0.69, + "learning_rate": 4.374951604409473e-05, + "loss": 0.8748, + "step": 12905 + }, + { + "epoch": 0.69, + "learning_rate": 4.374488311924958e-05, + "loss": 0.7128, + "step": 12910 + }, + { + "epoch": 0.69, + "learning_rate": 4.3740248723554256e-05, + "loss": 0.7926, + "step": 12915 + }, + { + "epoch": 0.69, + "learning_rate": 4.37356128573724e-05, + "loss": 0.7336, + "step": 12920 + }, + { + "epoch": 0.69, + "learning_rate": 4.3730975521067805e-05, + "loss": 0.679, + "step": 12925 + }, + { + "epoch": 0.69, + "learning_rate": 4.372633671500431e-05, + "loss": 0.7649, + "step": 12930 + }, + { + "epoch": 0.69, + "learning_rate": 4.3721696439545936e-05, + "loss": 0.8468, + "step": 12935 + }, + { + "epoch": 0.69, + "learning_rate": 4.371705469505678e-05, + "loss": 0.6405, + "step": 12940 + }, + { + "epoch": 0.69, + "learning_rate": 4.371241148190107e-05, + "loss": 0.703, + "step": 12945 + }, + { + "epoch": 0.69, + "learning_rate": 4.370776680044313e-05, + "loss": 0.7934, + "step": 12950 + }, + { + "epoch": 0.69, + "learning_rate": 4.370312065104741e-05, + "loss": 0.9499, + "step": 12955 + }, + { + "epoch": 0.69, + "learning_rate": 4.369847303407851e-05, + "loss": 0.8789, + "step": 12960 + }, + { + "epoch": 0.69, + "learning_rate": 4.369382394990108e-05, + "loss": 0.8279, + "step": 12965 + }, + { + "epoch": 0.69, + "learning_rate": 4.368917339887993e-05, + "loss": 0.8488, + "step": 12970 + }, + { + "epoch": 0.69, + "learning_rate": 4.368452138137998e-05, + "loss": 0.8183, + "step": 12975 + }, + { + "epoch": 0.69, + "learning_rate": 4.3679867897766255e-05, + "loss": 0.888, + "step": 12980 + }, + { + "epoch": 0.69, + "learning_rate": 4.3675212948403896e-05, + "loss": 0.7248, + "step": 12985 + }, + { + "epoch": 0.69, + "learning_rate": 4.367055653365816e-05, + "loss": 0.9176, + "step": 12990 + }, + { + "epoch": 0.7, + "learning_rate": 4.366589865389443e-05, + "loss": 0.8361, + "step": 12995 + }, + { + "epoch": 0.7, + "learning_rate": 4.3661239309478186e-05, + "loss": 0.7936, + "step": 13000 + }, + { + "epoch": 0.7, + "learning_rate": 4.365657850077504e-05, + "loss": 0.7791, + "step": 13005 + }, + { + "epoch": 0.7, + "learning_rate": 4.3651916228150694e-05, + "loss": 0.7412, + "step": 13010 + }, + { + "epoch": 0.7, + "learning_rate": 4.3647252491971e-05, + "loss": 0.8163, + "step": 13015 + }, + { + "epoch": 0.7, + "learning_rate": 4.3642587292601886e-05, + "loss": 0.778, + "step": 13020 + }, + { + "epoch": 0.7, + "learning_rate": 4.363792063040945e-05, + "loss": 0.8507, + "step": 13025 + }, + { + "epoch": 0.7, + "learning_rate": 4.3633252505759825e-05, + "loss": 0.7303, + "step": 13030 + }, + { + "epoch": 0.7, + "learning_rate": 4.362858291901933e-05, + "loss": 0.8076, + "step": 13035 + }, + { + "epoch": 0.7, + "learning_rate": 4.362391187055438e-05, + "loss": 0.8979, + "step": 13040 + }, + { + "epoch": 0.7, + "learning_rate": 4.361923936073148e-05, + "loss": 0.7571, + "step": 13045 + }, + { + "epoch": 0.7, + "learning_rate": 4.361456538991727e-05, + "loss": 0.6578, + "step": 13050 + }, + { + "epoch": 0.7, + "learning_rate": 4.36098899584785e-05, + "loss": 0.6153, + "step": 13055 + }, + { + "epoch": 0.7, + "learning_rate": 4.3605213066782045e-05, + "loss": 0.7902, + "step": 13060 + }, + { + "epoch": 0.7, + "learning_rate": 4.360053471519489e-05, + "loss": 0.7363, + "step": 13065 + }, + { + "epoch": 0.7, + "learning_rate": 4.35958549040841e-05, + "loss": 0.8214, + "step": 13070 + }, + { + "epoch": 0.7, + "learning_rate": 4.359117363381691e-05, + "loss": 0.7418, + "step": 13075 + }, + { + "epoch": 0.7, + "learning_rate": 4.358649090476065e-05, + "loss": 0.7491, + "step": 13080 + }, + { + "epoch": 0.7, + "learning_rate": 4.358180671728274e-05, + "loss": 0.7573, + "step": 13085 + }, + { + "epoch": 0.7, + "learning_rate": 4.357712107175073e-05, + "loss": 0.6123, + "step": 13090 + }, + { + "epoch": 0.7, + "learning_rate": 4.357243396853231e-05, + "loss": 0.6387, + "step": 13095 + }, + { + "epoch": 0.7, + "learning_rate": 4.356774540799524e-05, + "loss": 0.8532, + "step": 13100 + }, + { + "epoch": 0.7, + "learning_rate": 4.356305539050744e-05, + "loss": 0.7031, + "step": 13105 + }, + { + "epoch": 0.7, + "learning_rate": 4.3558363916436894e-05, + "loss": 0.6158, + "step": 13110 + }, + { + "epoch": 0.7, + "learning_rate": 4.355367098615174e-05, + "loss": 0.7412, + "step": 13115 + }, + { + "epoch": 0.7, + "learning_rate": 4.354897660002022e-05, + "loss": 0.7006, + "step": 13120 + }, + { + "epoch": 0.7, + "learning_rate": 4.3544280758410676e-05, + "loss": 0.7137, + "step": 13125 + }, + { + "epoch": 0.7, + "learning_rate": 4.353958346169159e-05, + "loss": 0.854, + "step": 13130 + }, + { + "epoch": 0.7, + "learning_rate": 4.353488471023153e-05, + "loss": 0.8121, + "step": 13135 + }, + { + "epoch": 0.7, + "learning_rate": 4.3530184504399204e-05, + "loss": 0.5475, + "step": 13140 + }, + { + "epoch": 0.7, + "learning_rate": 4.352548284456341e-05, + "loss": 0.7884, + "step": 13145 + }, + { + "epoch": 0.7, + "learning_rate": 4.3520779731093084e-05, + "loss": 0.6686, + "step": 13150 + }, + { + "epoch": 0.7, + "learning_rate": 4.351607516435725e-05, + "loss": 0.8095, + "step": 13155 + }, + { + "epoch": 0.7, + "learning_rate": 4.3511369144725066e-05, + "loss": 0.6401, + "step": 13160 + }, + { + "epoch": 0.7, + "learning_rate": 4.350666167256581e-05, + "loss": 0.8313, + "step": 13165 + }, + { + "epoch": 0.7, + "learning_rate": 4.350195274824885e-05, + "loss": 0.7963, + "step": 13170 + }, + { + "epoch": 0.7, + "learning_rate": 4.349724237214368e-05, + "loss": 0.6575, + "step": 13175 + }, + { + "epoch": 0.71, + "learning_rate": 4.349253054461991e-05, + "loss": 0.8806, + "step": 13180 + }, + { + "epoch": 0.71, + "learning_rate": 4.348781726604726e-05, + "loss": 0.7534, + "step": 13185 + }, + { + "epoch": 0.71, + "learning_rate": 4.3483102536795566e-05, + "loss": 0.8731, + "step": 13190 + }, + { + "epoch": 0.71, + "learning_rate": 4.3478386357234786e-05, + "loss": 0.7056, + "step": 13195 + }, + { + "epoch": 0.71, + "learning_rate": 4.3473668727734966e-05, + "loss": 0.9734, + "step": 13200 + }, + { + "epoch": 0.71, + "learning_rate": 4.34689496486663e-05, + "loss": 0.833, + "step": 13205 + }, + { + "epoch": 0.71, + "learning_rate": 4.3464229120399075e-05, + "loss": 0.7495, + "step": 13210 + }, + { + "epoch": 0.71, + "learning_rate": 4.345950714330369e-05, + "loss": 0.7892, + "step": 13215 + }, + { + "epoch": 0.71, + "learning_rate": 4.3454783717750664e-05, + "loss": 0.7466, + "step": 13220 + }, + { + "epoch": 0.71, + "learning_rate": 4.345005884411063e-05, + "loss": 0.7017, + "step": 13225 + }, + { + "epoch": 0.71, + "learning_rate": 4.3445332522754335e-05, + "loss": 0.907, + "step": 13230 + }, + { + "epoch": 0.71, + "learning_rate": 4.344060475405264e-05, + "loss": 0.6775, + "step": 13235 + }, + { + "epoch": 0.71, + "learning_rate": 4.343587553837651e-05, + "loss": 0.7486, + "step": 13240 + }, + { + "epoch": 0.71, + "learning_rate": 4.343114487609704e-05, + "loss": 0.7794, + "step": 13245 + }, + { + "epoch": 0.71, + "learning_rate": 4.342641276758543e-05, + "loss": 0.7655, + "step": 13250 + }, + { + "epoch": 0.71, + "learning_rate": 4.3421679213212986e-05, + "loss": 0.8388, + "step": 13255 + }, + { + "epoch": 0.71, + "learning_rate": 4.341694421335114e-05, + "loss": 0.7333, + "step": 13260 + }, + { + "epoch": 0.71, + "learning_rate": 4.341220776837143e-05, + "loss": 0.9167, + "step": 13265 + }, + { + "epoch": 0.71, + "learning_rate": 4.34074698786455e-05, + "loss": 0.8251, + "step": 13270 + }, + { + "epoch": 0.71, + "learning_rate": 4.3402730544545135e-05, + "loss": 0.7719, + "step": 13275 + }, + { + "epoch": 0.71, + "learning_rate": 4.3397989766442204e-05, + "loss": 0.9258, + "step": 13280 + }, + { + "epoch": 0.71, + "learning_rate": 4.3393247544708706e-05, + "loss": 0.9782, + "step": 13285 + }, + { + "epoch": 0.71, + "learning_rate": 4.338850387971674e-05, + "loss": 0.7782, + "step": 13290 + }, + { + "epoch": 0.71, + "learning_rate": 4.3383758771838534e-05, + "loss": 0.8658, + "step": 13295 + }, + { + "epoch": 0.71, + "learning_rate": 4.3379012221446415e-05, + "loss": 0.6974, + "step": 13300 + }, + { + "epoch": 0.71, + "learning_rate": 4.337426422891283e-05, + "loss": 0.7487, + "step": 13305 + }, + { + "epoch": 0.71, + "learning_rate": 4.336951479461035e-05, + "loss": 0.7499, + "step": 13310 + }, + { + "epoch": 0.71, + "learning_rate": 4.3364763918911635e-05, + "loss": 0.8276, + "step": 13315 + }, + { + "epoch": 0.71, + "learning_rate": 4.336001160218947e-05, + "loss": 0.8311, + "step": 13320 + }, + { + "epoch": 0.71, + "learning_rate": 4.335525784481676e-05, + "loss": 0.7503, + "step": 13325 + }, + { + "epoch": 0.71, + "learning_rate": 4.335050264716652e-05, + "loss": 0.74, + "step": 13330 + }, + { + "epoch": 0.71, + "learning_rate": 4.3345746009611865e-05, + "loss": 0.6921, + "step": 13335 + }, + { + "epoch": 0.71, + "learning_rate": 4.334098793252604e-05, + "loss": 0.8065, + "step": 13340 + }, + { + "epoch": 0.71, + "learning_rate": 4.333622841628239e-05, + "loss": 0.8301, + "step": 13345 + }, + { + "epoch": 0.71, + "learning_rate": 4.3331467461254387e-05, + "loss": 0.7791, + "step": 13350 + }, + { + "epoch": 0.71, + "learning_rate": 4.3326705067815606e-05, + "loss": 0.6827, + "step": 13355 + }, + { + "epoch": 0.71, + "learning_rate": 4.3321941236339724e-05, + "loss": 0.658, + "step": 13360 + }, + { + "epoch": 0.71, + "learning_rate": 4.331717596720056e-05, + "loss": 0.7526, + "step": 13365 + }, + { + "epoch": 0.72, + "learning_rate": 4.331240926077202e-05, + "loss": 0.8567, + "step": 13370 + }, + { + "epoch": 0.72, + "learning_rate": 4.3307641117428146e-05, + "loss": 0.8012, + "step": 13375 + }, + { + "epoch": 0.72, + "learning_rate": 4.3302871537543055e-05, + "loss": 0.7934, + "step": 13380 + }, + { + "epoch": 0.72, + "learning_rate": 4.329810052149101e-05, + "loss": 0.5949, + "step": 13385 + }, + { + "epoch": 0.72, + "learning_rate": 4.329332806964639e-05, + "loss": 0.6297, + "step": 13390 + }, + { + "epoch": 0.72, + "learning_rate": 4.328855418238366e-05, + "loss": 0.8759, + "step": 13395 + }, + { + "epoch": 0.72, + "learning_rate": 4.328377886007742e-05, + "loss": 0.6256, + "step": 13400 + }, + { + "epoch": 0.72, + "learning_rate": 4.3279002103102364e-05, + "loss": 0.8063, + "step": 13405 + }, + { + "epoch": 0.72, + "learning_rate": 4.327422391183331e-05, + "loss": 0.7435, + "step": 13410 + }, + { + "epoch": 0.72, + "learning_rate": 4.3269444286645195e-05, + "loss": 0.8356, + "step": 13415 + }, + { + "epoch": 0.72, + "learning_rate": 4.326466322791306e-05, + "loss": 0.8603, + "step": 13420 + }, + { + "epoch": 0.72, + "learning_rate": 4.325988073601205e-05, + "loss": 0.7352, + "step": 13425 + }, + { + "epoch": 0.72, + "learning_rate": 4.325509681131744e-05, + "loss": 0.7229, + "step": 13430 + }, + { + "epoch": 0.72, + "learning_rate": 4.3250311454204604e-05, + "loss": 0.7984, + "step": 13435 + }, + { + "epoch": 0.72, + "learning_rate": 4.324552466504904e-05, + "loss": 0.854, + "step": 13440 + }, + { + "epoch": 0.72, + "learning_rate": 4.324073644422635e-05, + "loss": 0.9319, + "step": 13445 + }, + { + "epoch": 0.72, + "learning_rate": 4.323594679211224e-05, + "loss": 0.7814, + "step": 13450 + }, + { + "epoch": 0.72, + "learning_rate": 4.323115570908255e-05, + "loss": 0.7654, + "step": 13455 + }, + { + "epoch": 0.72, + "learning_rate": 4.322636319551322e-05, + "loss": 0.7388, + "step": 13460 + }, + { + "epoch": 0.72, + "learning_rate": 4.3221569251780294e-05, + "loss": 0.9102, + "step": 13465 + }, + { + "epoch": 0.72, + "learning_rate": 4.321677387825995e-05, + "loss": 0.7954, + "step": 13470 + }, + { + "epoch": 0.72, + "learning_rate": 4.321197707532846e-05, + "loss": 0.7261, + "step": 13475 + }, + { + "epoch": 0.72, + "learning_rate": 4.3207178843362206e-05, + "loss": 0.8319, + "step": 13480 + }, + { + "epoch": 0.72, + "learning_rate": 4.3202379182737696e-05, + "loss": 0.812, + "step": 13485 + }, + { + "epoch": 0.72, + "learning_rate": 4.3197578093831546e-05, + "loss": 0.6999, + "step": 13490 + }, + { + "epoch": 0.72, + "learning_rate": 4.319277557702048e-05, + "loss": 0.7062, + "step": 13495 + }, + { + "epoch": 0.72, + "learning_rate": 4.3187971632681334e-05, + "loss": 0.7148, + "step": 13500 + }, + { + "epoch": 0.72, + "learning_rate": 4.318316626119106e-05, + "loss": 0.7924, + "step": 13505 + }, + { + "epoch": 0.72, + "learning_rate": 4.317835946292673e-05, + "loss": 0.8971, + "step": 13510 + }, + { + "epoch": 0.72, + "learning_rate": 4.31735512382655e-05, + "loss": 0.8842, + "step": 13515 + }, + { + "epoch": 0.72, + "learning_rate": 4.316874158758465e-05, + "loss": 0.7511, + "step": 13520 + }, + { + "epoch": 0.72, + "learning_rate": 4.316393051126161e-05, + "loss": 0.826, + "step": 13525 + }, + { + "epoch": 0.72, + "learning_rate": 4.315911800967386e-05, + "loss": 0.7259, + "step": 13530 + }, + { + "epoch": 0.72, + "learning_rate": 4.315430408319903e-05, + "loss": 0.7994, + "step": 13535 + }, + { + "epoch": 0.72, + "learning_rate": 4.314948873221486e-05, + "loss": 0.7357, + "step": 13540 + }, + { + "epoch": 0.72, + "learning_rate": 4.3144671957099195e-05, + "loss": 0.8327, + "step": 13545 + }, + { + "epoch": 0.72, + "learning_rate": 4.3139853758229974e-05, + "loss": 0.7013, + "step": 13550 + }, + { + "epoch": 0.73, + "learning_rate": 4.3135034135985284e-05, + "loss": 0.7645, + "step": 13555 + }, + { + "epoch": 0.73, + "learning_rate": 4.31302130907433e-05, + "loss": 0.7555, + "step": 13560 + }, + { + "epoch": 0.73, + "learning_rate": 4.312539062288232e-05, + "loss": 0.6446, + "step": 13565 + }, + { + "epoch": 0.73, + "learning_rate": 4.3120566732780734e-05, + "loss": 0.9196, + "step": 13570 + }, + { + "epoch": 0.73, + "learning_rate": 4.311574142081706e-05, + "loss": 0.7571, + "step": 13575 + }, + { + "epoch": 0.73, + "learning_rate": 4.311091468736993e-05, + "loss": 0.8022, + "step": 13580 + }, + { + "epoch": 0.73, + "learning_rate": 4.310608653281807e-05, + "loss": 0.7824, + "step": 13585 + }, + { + "epoch": 0.73, + "learning_rate": 4.310125695754036e-05, + "loss": 0.7804, + "step": 13590 + }, + { + "epoch": 0.73, + "learning_rate": 4.3096425961915726e-05, + "loss": 0.9084, + "step": 13595 + }, + { + "epoch": 0.73, + "learning_rate": 4.309159354632326e-05, + "loss": 0.7308, + "step": 13600 + }, + { + "epoch": 0.73, + "learning_rate": 4.308675971114215e-05, + "loss": 0.8479, + "step": 13605 + }, + { + "epoch": 0.73, + "learning_rate": 4.3081924456751665e-05, + "loss": 0.7876, + "step": 13610 + }, + { + "epoch": 0.73, + "learning_rate": 4.307708778353124e-05, + "loss": 0.7154, + "step": 13615 + }, + { + "epoch": 0.73, + "learning_rate": 4.307224969186038e-05, + "loss": 0.7687, + "step": 13620 + }, + { + "epoch": 0.73, + "learning_rate": 4.306741018211872e-05, + "loss": 0.8058, + "step": 13625 + }, + { + "epoch": 0.73, + "learning_rate": 4.3062569254685994e-05, + "loss": 0.8055, + "step": 13630 + }, + { + "epoch": 0.73, + "learning_rate": 4.3057726909942054e-05, + "loss": 0.7865, + "step": 13635 + }, + { + "epoch": 0.73, + "learning_rate": 4.305288314826688e-05, + "loss": 0.8011, + "step": 13640 + }, + { + "epoch": 0.73, + "learning_rate": 4.3048037970040515e-05, + "loss": 0.6767, + "step": 13645 + }, + { + "epoch": 0.73, + "learning_rate": 4.304319137564318e-05, + "loss": 0.9095, + "step": 13650 + }, + { + "epoch": 0.73, + "learning_rate": 4.303834336545514e-05, + "loss": 0.6939, + "step": 13655 + }, + { + "epoch": 0.73, + "learning_rate": 4.3033493939856816e-05, + "loss": 0.7084, + "step": 13660 + }, + { + "epoch": 0.73, + "learning_rate": 4.302864309922874e-05, + "loss": 0.8821, + "step": 13665 + }, + { + "epoch": 0.73, + "learning_rate": 4.3023790843951515e-05, + "loss": 0.7774, + "step": 13670 + }, + { + "epoch": 0.73, + "learning_rate": 4.30189371744059e-05, + "loss": 0.8113, + "step": 13675 + }, + { + "epoch": 0.73, + "learning_rate": 4.301408209097274e-05, + "loss": 0.7243, + "step": 13680 + }, + { + "epoch": 0.73, + "learning_rate": 4.300922559403301e-05, + "loss": 0.7637, + "step": 13685 + }, + { + "epoch": 0.73, + "learning_rate": 4.300436768396776e-05, + "loss": 0.8162, + "step": 13690 + }, + { + "epoch": 0.73, + "learning_rate": 4.299950836115819e-05, + "loss": 0.7728, + "step": 13695 + }, + { + "epoch": 0.73, + "learning_rate": 4.29946476259856e-05, + "loss": 0.7161, + "step": 13700 + }, + { + "epoch": 0.73, + "learning_rate": 4.298978547883138e-05, + "loss": 0.7888, + "step": 13705 + }, + { + "epoch": 0.73, + "learning_rate": 4.298492192007707e-05, + "loss": 0.8428, + "step": 13710 + }, + { + "epoch": 0.73, + "learning_rate": 4.298005695010427e-05, + "loss": 0.7264, + "step": 13715 + }, + { + "epoch": 0.73, + "learning_rate": 4.297519056929474e-05, + "loss": 0.6785, + "step": 13720 + }, + { + "epoch": 0.73, + "learning_rate": 4.297032277803032e-05, + "loss": 0.7387, + "step": 13725 + }, + { + "epoch": 0.73, + "learning_rate": 4.296545357669297e-05, + "loss": 0.8192, + "step": 13730 + }, + { + "epoch": 0.73, + "learning_rate": 4.296058296566476e-05, + "loss": 0.682, + "step": 13735 + }, + { + "epoch": 0.74, + "learning_rate": 4.2955710945327875e-05, + "loss": 0.7063, + "step": 13740 + }, + { + "epoch": 0.74, + "learning_rate": 4.2950837516064605e-05, + "loss": 0.8201, + "step": 13745 + }, + { + "epoch": 0.74, + "learning_rate": 4.2945962678257344e-05, + "loss": 0.9147, + "step": 13750 + }, + { + "epoch": 0.74, + "learning_rate": 4.294108643228862e-05, + "loss": 0.8736, + "step": 13755 + }, + { + "epoch": 0.74, + "learning_rate": 4.293620877854104e-05, + "loss": 0.8974, + "step": 13760 + }, + { + "epoch": 0.74, + "learning_rate": 4.293132971739736e-05, + "loss": 0.7415, + "step": 13765 + }, + { + "epoch": 0.74, + "learning_rate": 4.29264492492404e-05, + "loss": 0.8989, + "step": 13770 + }, + { + "epoch": 0.74, + "learning_rate": 4.292156737445312e-05, + "loss": 0.7945, + "step": 13775 + }, + { + "epoch": 0.74, + "learning_rate": 4.2916684093418605e-05, + "loss": 0.6267, + "step": 13780 + }, + { + "epoch": 0.74, + "learning_rate": 4.291179940652e-05, + "loss": 0.8909, + "step": 13785 + }, + { + "epoch": 0.74, + "learning_rate": 4.290691331414061e-05, + "loss": 0.7708, + "step": 13790 + }, + { + "epoch": 0.74, + "learning_rate": 4.2902025816663826e-05, + "loss": 0.9017, + "step": 13795 + }, + { + "epoch": 0.74, + "learning_rate": 4.289713691447316e-05, + "loss": 0.7676, + "step": 13800 + }, + { + "epoch": 0.74, + "learning_rate": 4.2892246607952214e-05, + "loss": 0.8595, + "step": 13805 + }, + { + "epoch": 0.74, + "learning_rate": 4.288735489748473e-05, + "loss": 0.7769, + "step": 13810 + }, + { + "epoch": 0.74, + "learning_rate": 4.2882461783454534e-05, + "loss": 0.8757, + "step": 13815 + }, + { + "epoch": 0.74, + "learning_rate": 4.287756726624557e-05, + "loss": 0.7026, + "step": 13820 + }, + { + "epoch": 0.74, + "learning_rate": 4.287267134624191e-05, + "loss": 0.7234, + "step": 13825 + }, + { + "epoch": 0.74, + "learning_rate": 4.2867774023827706e-05, + "loss": 0.6693, + "step": 13830 + }, + { + "epoch": 0.74, + "learning_rate": 4.286287529938724e-05, + "loss": 0.7171, + "step": 13835 + }, + { + "epoch": 0.74, + "learning_rate": 4.2857975173304906e-05, + "loss": 0.6785, + "step": 13840 + }, + { + "epoch": 0.74, + "learning_rate": 4.28530736459652e-05, + "loss": 0.7787, + "step": 13845 + }, + { + "epoch": 0.74, + "learning_rate": 4.284817071775271e-05, + "loss": 0.7244, + "step": 13850 + }, + { + "epoch": 0.74, + "learning_rate": 4.284326638905218e-05, + "loss": 0.8608, + "step": 13855 + }, + { + "epoch": 0.74, + "learning_rate": 4.283836066024841e-05, + "loss": 0.7805, + "step": 13860 + }, + { + "epoch": 0.74, + "learning_rate": 4.283345353172636e-05, + "loss": 0.8232, + "step": 13865 + }, + { + "epoch": 0.74, + "learning_rate": 4.282854500387107e-05, + "loss": 0.7503, + "step": 13870 + }, + { + "epoch": 0.74, + "learning_rate": 4.282363507706769e-05, + "loss": 0.78, + "step": 13875 + }, + { + "epoch": 0.74, + "learning_rate": 4.281872375170148e-05, + "loss": 0.7393, + "step": 13880 + }, + { + "epoch": 0.74, + "learning_rate": 4.281381102815784e-05, + "loss": 0.674, + "step": 13885 + }, + { + "epoch": 0.74, + "learning_rate": 4.280889690682223e-05, + "loss": 0.806, + "step": 13890 + }, + { + "epoch": 0.74, + "learning_rate": 4.2803981388080254e-05, + "loss": 0.7558, + "step": 13895 + }, + { + "epoch": 0.74, + "learning_rate": 4.279906447231763e-05, + "loss": 0.8335, + "step": 13900 + }, + { + "epoch": 0.74, + "learning_rate": 4.279414615992014e-05, + "loss": 0.7716, + "step": 13905 + }, + { + "epoch": 0.74, + "learning_rate": 4.278922645127375e-05, + "loss": 0.9745, + "step": 13910 + }, + { + "epoch": 0.74, + "learning_rate": 4.2784305346764464e-05, + "loss": 0.8848, + "step": 13915 + }, + { + "epoch": 0.74, + "learning_rate": 4.2779382846778434e-05, + "loss": 0.8083, + "step": 13920 + }, + { + "epoch": 0.74, + "learning_rate": 4.277445895170191e-05, + "loss": 0.7511, + "step": 13925 + }, + { + "epoch": 0.75, + "learning_rate": 4.276953366192126e-05, + "loss": 0.9569, + "step": 13930 + }, + { + "epoch": 0.75, + "learning_rate": 4.276460697782295e-05, + "loss": 0.832, + "step": 13935 + }, + { + "epoch": 0.75, + "learning_rate": 4.275967889979356e-05, + "loss": 0.7719, + "step": 13940 + }, + { + "epoch": 0.75, + "learning_rate": 4.275474942821978e-05, + "loss": 0.8437, + "step": 13945 + }, + { + "epoch": 0.75, + "learning_rate": 4.274981856348842e-05, + "loss": 0.8975, + "step": 13950 + }, + { + "epoch": 0.75, + "learning_rate": 4.2744886305986376e-05, + "loss": 0.77, + "step": 13955 + }, + { + "epoch": 0.75, + "learning_rate": 4.273995265610068e-05, + "loss": 0.8018, + "step": 13960 + }, + { + "epoch": 0.75, + "learning_rate": 4.2735017614218444e-05, + "loss": 0.6188, + "step": 13965 + }, + { + "epoch": 0.75, + "learning_rate": 4.273008118072691e-05, + "loss": 0.8172, + "step": 13970 + }, + { + "epoch": 0.75, + "learning_rate": 4.272514335601343e-05, + "loss": 0.8165, + "step": 13975 + }, + { + "epoch": 0.75, + "learning_rate": 4.272020414046546e-05, + "loss": 0.7788, + "step": 13980 + }, + { + "epoch": 0.75, + "learning_rate": 4.2715263534470545e-05, + "loss": 0.8823, + "step": 13985 + }, + { + "epoch": 0.75, + "learning_rate": 4.271032153841638e-05, + "loss": 0.8042, + "step": 13990 + }, + { + "epoch": 0.75, + "learning_rate": 4.2705378152690746e-05, + "loss": 0.7676, + "step": 13995 + }, + { + "epoch": 0.75, + "learning_rate": 4.2700433377681514e-05, + "loss": 0.6615, + "step": 14000 + }, + { + "epoch": 0.75, + "learning_rate": 4.2695487213776705e-05, + "loss": 0.8033, + "step": 14005 + }, + { + "epoch": 0.75, + "learning_rate": 4.269053966136443e-05, + "loss": 0.7983, + "step": 14010 + }, + { + "epoch": 0.75, + "learning_rate": 4.268559072083289e-05, + "loss": 0.7349, + "step": 14015 + }, + { + "epoch": 0.75, + "learning_rate": 4.2680640392570425e-05, + "loss": 0.9242, + "step": 14020 + }, + { + "epoch": 0.75, + "learning_rate": 4.267568867696548e-05, + "loss": 0.891, + "step": 14025 + }, + { + "epoch": 0.75, + "learning_rate": 4.267073557440657e-05, + "loss": 0.8082, + "step": 14030 + }, + { + "epoch": 0.75, + "learning_rate": 4.2665781085282376e-05, + "loss": 0.8487, + "step": 14035 + }, + { + "epoch": 0.75, + "learning_rate": 4.266082520998165e-05, + "loss": 0.8148, + "step": 14040 + }, + { + "epoch": 0.75, + "learning_rate": 4.265586794889327e-05, + "loss": 0.6315, + "step": 14045 + }, + { + "epoch": 0.75, + "learning_rate": 4.26509093024062e-05, + "loss": 0.6232, + "step": 14050 + }, + { + "epoch": 0.75, + "learning_rate": 4.264594927090955e-05, + "loss": 0.7678, + "step": 14055 + }, + { + "epoch": 0.75, + "learning_rate": 4.26409878547925e-05, + "loss": 0.7228, + "step": 14060 + }, + { + "epoch": 0.75, + "learning_rate": 4.263602505444437e-05, + "loss": 0.6994, + "step": 14065 + }, + { + "epoch": 0.75, + "learning_rate": 4.263106087025456e-05, + "loss": 0.7507, + "step": 14070 + }, + { + "epoch": 0.75, + "learning_rate": 4.262609530261262e-05, + "loss": 0.6821, + "step": 14075 + }, + { + "epoch": 0.75, + "learning_rate": 4.262112835190815e-05, + "loss": 0.6878, + "step": 14080 + }, + { + "epoch": 0.75, + "learning_rate": 4.26161600185309e-05, + "loss": 0.6651, + "step": 14085 + }, + { + "epoch": 0.75, + "learning_rate": 4.261119030287074e-05, + "loss": 0.9296, + "step": 14090 + }, + { + "epoch": 0.75, + "learning_rate": 4.2606219205317606e-05, + "loss": 0.6205, + "step": 14095 + }, + { + "epoch": 0.75, + "learning_rate": 4.260124672626156e-05, + "loss": 0.646, + "step": 14100 + }, + { + "epoch": 0.75, + "learning_rate": 4.25962728660928e-05, + "loss": 0.7434, + "step": 14105 + }, + { + "epoch": 0.75, + "learning_rate": 4.259129762520159e-05, + "loss": 0.8019, + "step": 14110 + }, + { + "epoch": 0.76, + "learning_rate": 4.258632100397831e-05, + "loss": 0.6602, + "step": 14115 + }, + { + "epoch": 0.76, + "learning_rate": 4.258134300281349e-05, + "loss": 0.6979, + "step": 14120 + }, + { + "epoch": 0.76, + "learning_rate": 4.257636362209772e-05, + "loss": 0.7679, + "step": 14125 + }, + { + "epoch": 0.76, + "learning_rate": 4.257138286222172e-05, + "loss": 0.6943, + "step": 14130 + }, + { + "epoch": 0.76, + "learning_rate": 4.25664007235763e-05, + "loss": 0.6629, + "step": 14135 + }, + { + "epoch": 0.76, + "learning_rate": 4.2561417206552415e-05, + "loss": 0.8641, + "step": 14140 + }, + { + "epoch": 0.76, + "learning_rate": 4.2556432311541095e-05, + "loss": 0.7435, + "step": 14145 + }, + { + "epoch": 0.76, + "learning_rate": 4.255144603893348e-05, + "loss": 0.7812, + "step": 14150 + }, + { + "epoch": 0.76, + "learning_rate": 4.2546458389120846e-05, + "loss": 0.7806, + "step": 14155 + }, + { + "epoch": 0.76, + "learning_rate": 4.254146936249455e-05, + "loss": 0.7192, + "step": 14160 + }, + { + "epoch": 0.76, + "learning_rate": 4.2536478959446046e-05, + "loss": 0.8489, + "step": 14165 + }, + { + "epoch": 0.76, + "learning_rate": 4.2531487180366934e-05, + "loss": 0.9311, + "step": 14170 + }, + { + "epoch": 0.76, + "learning_rate": 4.25264940256489e-05, + "loss": 0.6379, + "step": 14175 + }, + { + "epoch": 0.76, + "learning_rate": 4.252149949568374e-05, + "loss": 0.8005, + "step": 14180 + }, + { + "epoch": 0.76, + "learning_rate": 4.251650359086336e-05, + "loss": 0.8579, + "step": 14185 + }, + { + "epoch": 0.76, + "learning_rate": 4.251150631157977e-05, + "loss": 0.7688, + "step": 14190 + }, + { + "epoch": 0.76, + "learning_rate": 4.250650765822509e-05, + "loss": 0.8505, + "step": 14195 + }, + { + "epoch": 0.76, + "learning_rate": 4.250150763119155e-05, + "loss": 0.7682, + "step": 14200 + }, + { + "epoch": 0.76, + "learning_rate": 4.249650623087148e-05, + "loss": 0.6978, + "step": 14205 + }, + { + "epoch": 0.76, + "learning_rate": 4.2491503457657335e-05, + "loss": 0.7054, + "step": 14210 + }, + { + "epoch": 0.76, + "learning_rate": 4.248649931194165e-05, + "loss": 0.8373, + "step": 14215 + }, + { + "epoch": 0.76, + "learning_rate": 4.24814937941171e-05, + "loss": 0.784, + "step": 14220 + }, + { + "epoch": 0.76, + "learning_rate": 4.247648690457645e-05, + "loss": 0.7168, + "step": 14225 + }, + { + "epoch": 0.76, + "learning_rate": 4.247147864371256e-05, + "loss": 0.7629, + "step": 14230 + }, + { + "epoch": 0.76, + "learning_rate": 4.246646901191843e-05, + "loss": 0.7039, + "step": 14235 + }, + { + "epoch": 0.76, + "learning_rate": 4.246145800958714e-05, + "loss": 0.8141, + "step": 14240 + }, + { + "epoch": 0.76, + "learning_rate": 4.245644563711189e-05, + "loss": 0.9522, + "step": 14245 + }, + { + "epoch": 0.76, + "learning_rate": 4.245143189488598e-05, + "loss": 0.8157, + "step": 14250 + }, + { + "epoch": 0.76, + "learning_rate": 4.244641678330282e-05, + "loss": 0.8473, + "step": 14255 + }, + { + "epoch": 0.76, + "learning_rate": 4.2441400302755945e-05, + "loss": 0.711, + "step": 14260 + }, + { + "epoch": 0.76, + "learning_rate": 4.243638245363897e-05, + "loss": 0.7445, + "step": 14265 + }, + { + "epoch": 0.76, + "learning_rate": 4.2431363236345625e-05, + "loss": 0.7772, + "step": 14270 + }, + { + "epoch": 0.76, + "learning_rate": 4.242634265126977e-05, + "loss": 0.7294, + "step": 14275 + }, + { + "epoch": 0.76, + "learning_rate": 4.242132069880533e-05, + "loss": 0.6996, + "step": 14280 + }, + { + "epoch": 0.76, + "learning_rate": 4.2416297379346376e-05, + "loss": 0.6972, + "step": 14285 + }, + { + "epoch": 0.76, + "learning_rate": 4.2411272693287064e-05, + "loss": 0.7665, + "step": 14290 + }, + { + "epoch": 0.76, + "learning_rate": 4.240624664102167e-05, + "loss": 0.739, + "step": 14295 + }, + { + "epoch": 0.77, + "learning_rate": 4.240121922294459e-05, + "loss": 0.7974, + "step": 14300 + }, + { + "epoch": 0.77, + "learning_rate": 4.239619043945027e-05, + "loss": 0.9701, + "step": 14305 + }, + { + "epoch": 0.77, + "learning_rate": 4.239116029093333e-05, + "loss": 0.9068, + "step": 14310 + }, + { + "epoch": 0.77, + "learning_rate": 4.2386128777788465e-05, + "loss": 0.947, + "step": 14315 + }, + { + "epoch": 0.77, + "learning_rate": 4.238109590041047e-05, + "loss": 0.6394, + "step": 14320 + }, + { + "epoch": 0.77, + "learning_rate": 4.237606165919428e-05, + "loss": 0.6713, + "step": 14325 + }, + { + "epoch": 0.77, + "learning_rate": 4.2371026054534904e-05, + "loss": 0.6775, + "step": 14330 + }, + { + "epoch": 0.77, + "learning_rate": 4.2365989086827454e-05, + "loss": 0.8097, + "step": 14335 + }, + { + "epoch": 0.77, + "learning_rate": 4.236095075646719e-05, + "loss": 0.8135, + "step": 14340 + }, + { + "epoch": 0.77, + "learning_rate": 4.235591106384944e-05, + "loss": 0.7608, + "step": 14345 + }, + { + "epoch": 0.77, + "learning_rate": 4.2350870009369654e-05, + "loss": 0.8754, + "step": 14350 + }, + { + "epoch": 0.77, + "learning_rate": 4.234582759342339e-05, + "loss": 0.8942, + "step": 14355 + }, + { + "epoch": 0.77, + "learning_rate": 4.234078381640631e-05, + "loss": 0.7907, + "step": 14360 + }, + { + "epoch": 0.77, + "learning_rate": 4.233573867871418e-05, + "loss": 0.7991, + "step": 14365 + }, + { + "epoch": 0.77, + "learning_rate": 4.233069218074287e-05, + "loss": 0.7618, + "step": 14370 + }, + { + "epoch": 0.77, + "learning_rate": 4.232564432288838e-05, + "loss": 0.7194, + "step": 14375 + }, + { + "epoch": 0.77, + "learning_rate": 4.232059510554678e-05, + "loss": 0.6555, + "step": 14380 + }, + { + "epoch": 0.77, + "learning_rate": 4.231554452911427e-05, + "loss": 0.6782, + "step": 14385 + }, + { + "epoch": 0.77, + "learning_rate": 4.231049259398716e-05, + "loss": 0.7596, + "step": 14390 + }, + { + "epoch": 0.77, + "learning_rate": 4.230543930056186e-05, + "loss": 0.76, + "step": 14395 + }, + { + "epoch": 0.77, + "learning_rate": 4.230038464923488e-05, + "loss": 0.6658, + "step": 14400 + }, + { + "epoch": 0.77, + "learning_rate": 4.2295328640402836e-05, + "loss": 0.8056, + "step": 14405 + }, + { + "epoch": 0.77, + "learning_rate": 4.2290271274462464e-05, + "loss": 0.6643, + "step": 14410 + }, + { + "epoch": 0.77, + "learning_rate": 4.2285212551810604e-05, + "loss": 0.7615, + "step": 14415 + }, + { + "epoch": 0.77, + "learning_rate": 4.2280152472844194e-05, + "loss": 0.8005, + "step": 14420 + }, + { + "epoch": 0.77, + "learning_rate": 4.2275091037960276e-05, + "loss": 0.7706, + "step": 14425 + }, + { + "epoch": 0.77, + "learning_rate": 4.2270028247556e-05, + "loss": 0.7255, + "step": 14430 + }, + { + "epoch": 0.77, + "learning_rate": 4.2264964102028646e-05, + "loss": 0.8158, + "step": 14435 + }, + { + "epoch": 0.77, + "learning_rate": 4.2259898601775567e-05, + "loss": 0.7915, + "step": 14440 + }, + { + "epoch": 0.77, + "learning_rate": 4.225483174719424e-05, + "loss": 1.0766, + "step": 14445 + }, + { + "epoch": 0.77, + "learning_rate": 4.224976353868224e-05, + "loss": 0.8752, + "step": 14450 + }, + { + "epoch": 0.77, + "learning_rate": 4.224469397663726e-05, + "loss": 0.7725, + "step": 14455 + }, + { + "epoch": 0.77, + "learning_rate": 4.223962306145709e-05, + "loss": 0.7454, + "step": 14460 + }, + { + "epoch": 0.77, + "learning_rate": 4.223455079353963e-05, + "loss": 0.7165, + "step": 14465 + }, + { + "epoch": 0.77, + "learning_rate": 4.222947717328287e-05, + "loss": 0.7469, + "step": 14470 + }, + { + "epoch": 0.77, + "learning_rate": 4.2224402201084945e-05, + "loss": 0.8593, + "step": 14475 + }, + { + "epoch": 0.77, + "learning_rate": 4.2219325877344054e-05, + "loss": 0.7903, + "step": 14480 + }, + { + "epoch": 0.77, + "learning_rate": 4.2214248202458524e-05, + "loss": 0.8492, + "step": 14485 + }, + { + "epoch": 0.78, + "learning_rate": 4.2209169176826785e-05, + "loss": 0.6709, + "step": 14490 + }, + { + "epoch": 0.78, + "learning_rate": 4.220408880084737e-05, + "loss": 0.7305, + "step": 14495 + }, + { + "epoch": 0.78, + "learning_rate": 4.219900707491892e-05, + "loss": 0.6952, + "step": 14500 + }, + { + "epoch": 0.78, + "learning_rate": 4.219392399944018e-05, + "loss": 0.7016, + "step": 14505 + }, + { + "epoch": 0.78, + "learning_rate": 4.2188839574810014e-05, + "loss": 0.9051, + "step": 14510 + }, + { + "epoch": 0.78, + "learning_rate": 4.2183753801427364e-05, + "loss": 0.8284, + "step": 14515 + }, + { + "epoch": 0.78, + "learning_rate": 4.217866667969129e-05, + "loss": 0.7009, + "step": 14520 + }, + { + "epoch": 0.78, + "learning_rate": 4.217357821000099e-05, + "loss": 0.7906, + "step": 14525 + }, + { + "epoch": 0.78, + "learning_rate": 4.2168488392755715e-05, + "loss": 0.7416, + "step": 14530 + }, + { + "epoch": 0.78, + "learning_rate": 4.216339722835486e-05, + "loss": 0.8503, + "step": 14535 + }, + { + "epoch": 0.78, + "learning_rate": 4.215830471719789e-05, + "loss": 0.8094, + "step": 14540 + }, + { + "epoch": 0.78, + "learning_rate": 4.215321085968443e-05, + "loss": 0.7569, + "step": 14545 + }, + { + "epoch": 0.78, + "learning_rate": 4.214811565621416e-05, + "loss": 0.7285, + "step": 14550 + }, + { + "epoch": 0.78, + "learning_rate": 4.214301910718688e-05, + "loss": 0.7024, + "step": 14555 + }, + { + "epoch": 0.78, + "learning_rate": 4.213792121300252e-05, + "loss": 0.6515, + "step": 14560 + }, + { + "epoch": 0.78, + "learning_rate": 4.2132821974061064e-05, + "loss": 0.7869, + "step": 14565 + }, + { + "epoch": 0.78, + "learning_rate": 4.212772139076266e-05, + "loss": 0.7572, + "step": 14570 + }, + { + "epoch": 0.78, + "learning_rate": 4.2122619463507516e-05, + "loss": 0.8004, + "step": 14575 + }, + { + "epoch": 0.78, + "learning_rate": 4.2117516192695986e-05, + "loss": 0.8827, + "step": 14580 + }, + { + "epoch": 0.78, + "learning_rate": 4.211241157872848e-05, + "loss": 0.8307, + "step": 14585 + }, + { + "epoch": 0.78, + "learning_rate": 4.210730562200557e-05, + "loss": 0.7506, + "step": 14590 + }, + { + "epoch": 0.78, + "learning_rate": 4.210219832292787e-05, + "loss": 0.7545, + "step": 14595 + }, + { + "epoch": 0.78, + "learning_rate": 4.209708968189615e-05, + "loss": 0.7133, + "step": 14600 + }, + { + "epoch": 0.78, + "learning_rate": 4.209197969931128e-05, + "loss": 0.8743, + "step": 14605 + }, + { + "epoch": 0.78, + "learning_rate": 4.208686837557421e-05, + "loss": 0.7308, + "step": 14610 + }, + { + "epoch": 0.78, + "learning_rate": 4.2081755711086014e-05, + "loss": 0.8478, + "step": 14615 + }, + { + "epoch": 0.78, + "learning_rate": 4.207664170624786e-05, + "loss": 0.9201, + "step": 14620 + }, + { + "epoch": 0.78, + "learning_rate": 4.2071526361461034e-05, + "loss": 0.7675, + "step": 14625 + }, + { + "epoch": 0.78, + "learning_rate": 4.206640967712691e-05, + "loss": 0.6525, + "step": 14630 + }, + { + "epoch": 0.78, + "learning_rate": 4.2061291653646996e-05, + "loss": 0.8383, + "step": 14635 + }, + { + "epoch": 0.78, + "learning_rate": 4.205617229142287e-05, + "loss": 0.791, + "step": 14640 + }, + { + "epoch": 0.78, + "learning_rate": 4.205105159085624e-05, + "loss": 0.8871, + "step": 14645 + }, + { + "epoch": 0.78, + "learning_rate": 4.2045929552348914e-05, + "loss": 0.8507, + "step": 14650 + }, + { + "epoch": 0.78, + "learning_rate": 4.2040806176302795e-05, + "loss": 0.6648, + "step": 14655 + }, + { + "epoch": 0.78, + "learning_rate": 4.203568146311989e-05, + "loss": 0.8827, + "step": 14660 + }, + { + "epoch": 0.78, + "learning_rate": 4.203055541320233e-05, + "loss": 0.8537, + "step": 14665 + }, + { + "epoch": 0.78, + "learning_rate": 4.202542802695235e-05, + "loss": 0.6253, + "step": 14670 + }, + { + "epoch": 0.79, + "learning_rate": 4.202029930477226e-05, + "loss": 0.8452, + "step": 14675 + }, + { + "epoch": 0.79, + "learning_rate": 4.2015169247064494e-05, + "loss": 0.8675, + "step": 14680 + }, + { + "epoch": 0.79, + "learning_rate": 4.20100378542316e-05, + "loss": 0.7464, + "step": 14685 + }, + { + "epoch": 0.79, + "learning_rate": 4.2004905126676225e-05, + "loss": 0.7281, + "step": 14690 + }, + { + "epoch": 0.79, + "learning_rate": 4.199977106480111e-05, + "loss": 0.7011, + "step": 14695 + }, + { + "epoch": 0.79, + "learning_rate": 4.199463566900911e-05, + "loss": 0.8669, + "step": 14700 + }, + { + "epoch": 0.79, + "learning_rate": 4.1989498939703186e-05, + "loss": 0.5759, + "step": 14705 + }, + { + "epoch": 0.79, + "learning_rate": 4.19843608772864e-05, + "loss": 0.8512, + "step": 14710 + }, + { + "epoch": 0.79, + "learning_rate": 4.197922148216191e-05, + "loss": 0.6725, + "step": 14715 + }, + { + "epoch": 0.79, + "learning_rate": 4.1974080754732994e-05, + "loss": 0.7492, + "step": 14720 + }, + { + "epoch": 0.79, + "learning_rate": 4.1968938695403026e-05, + "loss": 0.7745, + "step": 14725 + }, + { + "epoch": 0.79, + "learning_rate": 4.1963795304575497e-05, + "loss": 0.7565, + "step": 14730 + }, + { + "epoch": 0.79, + "learning_rate": 4.1958650582653986e-05, + "loss": 0.7457, + "step": 14735 + }, + { + "epoch": 0.79, + "learning_rate": 4.195350453004218e-05, + "loss": 0.7437, + "step": 14740 + }, + { + "epoch": 0.79, + "learning_rate": 4.194835714714386e-05, + "loss": 0.7795, + "step": 14745 + }, + { + "epoch": 0.79, + "learning_rate": 4.194320843436296e-05, + "loss": 0.8125, + "step": 14750 + }, + { + "epoch": 0.79, + "learning_rate": 4.193805839210344e-05, + "loss": 0.7636, + "step": 14755 + }, + { + "epoch": 0.79, + "learning_rate": 4.193290702076945e-05, + "loss": 0.7778, + "step": 14760 + }, + { + "epoch": 0.79, + "learning_rate": 4.1927754320765166e-05, + "loss": 0.8949, + "step": 14765 + }, + { + "epoch": 0.79, + "learning_rate": 4.192260029249492e-05, + "loss": 0.8191, + "step": 14770 + }, + { + "epoch": 0.79, + "learning_rate": 4.191744493636313e-05, + "loss": 0.6705, + "step": 14775 + }, + { + "epoch": 0.79, + "learning_rate": 4.1912288252774326e-05, + "loss": 0.7548, + "step": 14780 + }, + { + "epoch": 0.79, + "learning_rate": 4.190713024213312e-05, + "loss": 0.6672, + "step": 14785 + }, + { + "epoch": 0.79, + "learning_rate": 4.190197090484426e-05, + "loss": 0.8458, + "step": 14790 + }, + { + "epoch": 0.79, + "learning_rate": 4.189681024131258e-05, + "loss": 0.8437, + "step": 14795 + }, + { + "epoch": 0.79, + "learning_rate": 4.1891648251943006e-05, + "loss": 0.7822, + "step": 14800 + }, + { + "epoch": 0.79, + "learning_rate": 4.18864849371406e-05, + "loss": 0.8932, + "step": 14805 + }, + { + "epoch": 0.79, + "learning_rate": 4.18813202973105e-05, + "loss": 0.6989, + "step": 14810 + }, + { + "epoch": 0.79, + "learning_rate": 4.187615433285797e-05, + "loss": 0.8448, + "step": 14815 + }, + { + "epoch": 0.79, + "learning_rate": 4.187098704418836e-05, + "loss": 0.7325, + "step": 14820 + }, + { + "epoch": 0.79, + "learning_rate": 4.1865818431707124e-05, + "loss": 0.8346, + "step": 14825 + }, + { + "epoch": 0.79, + "learning_rate": 4.186064849581983e-05, + "loss": 0.7526, + "step": 14830 + }, + { + "epoch": 0.79, + "learning_rate": 4.185547723693215e-05, + "loss": 0.7775, + "step": 14835 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850304655449855e-05, + "loss": 0.8915, + "step": 14840 + }, + { + "epoch": 0.79, + "learning_rate": 4.1845130751778826e-05, + "loss": 0.739, + "step": 14845 + }, + { + "epoch": 0.79, + "learning_rate": 4.1839955526325026e-05, + "loss": 0.7897, + "step": 14850 + }, + { + "epoch": 0.79, + "learning_rate": 4.1834778979494556e-05, + "loss": 0.657, + "step": 14855 + }, + { + "epoch": 0.79, + "learning_rate": 4.182960111169359e-05, + "loss": 0.6768, + "step": 14860 + }, + { + "epoch": 0.8, + "learning_rate": 4.1824421923328427e-05, + "loss": 0.7218, + "step": 14865 + }, + { + "epoch": 0.8, + "learning_rate": 4.181924141480545e-05, + "loss": 0.815, + "step": 14870 + }, + { + "epoch": 0.8, + "learning_rate": 4.1814059586531174e-05, + "loss": 0.7547, + "step": 14875 + }, + { + "epoch": 0.8, + "learning_rate": 4.180887643891218e-05, + "loss": 0.7784, + "step": 14880 + }, + { + "epoch": 0.8, + "learning_rate": 4.1803691972355195e-05, + "loss": 0.6969, + "step": 14885 + }, + { + "epoch": 0.8, + "learning_rate": 4.1798506187267004e-05, + "loss": 0.8073, + "step": 14890 + }, + { + "epoch": 0.8, + "learning_rate": 4.179331908405454e-05, + "loss": 0.8073, + "step": 14895 + }, + { + "epoch": 0.8, + "learning_rate": 4.1788130663124804e-05, + "loss": 0.6123, + "step": 14900 + }, + { + "epoch": 0.8, + "learning_rate": 4.178294092488492e-05, + "loss": 0.8278, + "step": 14905 + }, + { + "epoch": 0.8, + "learning_rate": 4.177774986974211e-05, + "loss": 0.8543, + "step": 14910 + }, + { + "epoch": 0.8, + "learning_rate": 4.177255749810369e-05, + "loss": 0.8314, + "step": 14915 + }, + { + "epoch": 0.8, + "learning_rate": 4.176736381037712e-05, + "loss": 0.8078, + "step": 14920 + }, + { + "epoch": 0.8, + "learning_rate": 4.176216880696988e-05, + "loss": 0.7023, + "step": 14925 + }, + { + "epoch": 0.8, + "learning_rate": 4.1756972488289656e-05, + "loss": 0.7758, + "step": 14930 + }, + { + "epoch": 0.8, + "learning_rate": 4.175177485474415e-05, + "loss": 0.8373, + "step": 14935 + }, + { + "epoch": 0.8, + "learning_rate": 4.174657590674122e-05, + "loss": 0.8394, + "step": 14940 + }, + { + "epoch": 0.8, + "learning_rate": 4.174137564468881e-05, + "loss": 0.766, + "step": 14945 + }, + { + "epoch": 0.8, + "learning_rate": 4.173617406899496e-05, + "loss": 0.8331, + "step": 14950 + }, + { + "epoch": 0.8, + "learning_rate": 4.173097118006783e-05, + "loss": 0.8118, + "step": 14955 + }, + { + "epoch": 0.8, + "learning_rate": 4.1725766978315675e-05, + "loss": 0.7548, + "step": 14960 + }, + { + "epoch": 0.8, + "learning_rate": 4.172056146414684e-05, + "loss": 0.7587, + "step": 14965 + }, + { + "epoch": 0.8, + "learning_rate": 4.17153546379698e-05, + "loss": 0.7882, + "step": 14970 + }, + { + "epoch": 0.8, + "learning_rate": 4.1710146500193106e-05, + "loss": 0.8631, + "step": 14975 + }, + { + "epoch": 0.8, + "learning_rate": 4.170493705122543e-05, + "loss": 0.6468, + "step": 14980 + }, + { + "epoch": 0.8, + "learning_rate": 4.1699726291475524e-05, + "loss": 0.7515, + "step": 14985 + }, + { + "epoch": 0.8, + "learning_rate": 4.169451422135229e-05, + "loss": 0.9514, + "step": 14990 + }, + { + "epoch": 0.8, + "learning_rate": 4.168930084126468e-05, + "loss": 0.7948, + "step": 14995 + }, + { + "epoch": 0.8, + "learning_rate": 4.168408615162178e-05, + "loss": 0.8938, + "step": 15000 + }, + { + "epoch": 0.8, + "learning_rate": 4.167887015283276e-05, + "loss": 0.6644, + "step": 15005 + }, + { + "epoch": 0.8, + "learning_rate": 4.167365284530691e-05, + "loss": 0.784, + "step": 15010 + }, + { + "epoch": 0.8, + "learning_rate": 4.166843422945362e-05, + "loss": 0.7293, + "step": 15015 + }, + { + "epoch": 0.8, + "learning_rate": 4.166321430568236e-05, + "loss": 0.8173, + "step": 15020 + }, + { + "epoch": 0.8, + "learning_rate": 4.1657993074402745e-05, + "loss": 0.6368, + "step": 15025 + }, + { + "epoch": 0.8, + "learning_rate": 4.1652770536024445e-05, + "loss": 0.7605, + "step": 15030 + }, + { + "epoch": 0.8, + "learning_rate": 4.164754669095727e-05, + "loss": 0.8674, + "step": 15035 + }, + { + "epoch": 0.8, + "learning_rate": 4.164232153961112e-05, + "loss": 0.8138, + "step": 15040 + }, + { + "epoch": 0.8, + "learning_rate": 4.1637095082395985e-05, + "loss": 0.6695, + "step": 15045 + }, + { + "epoch": 0.81, + "learning_rate": 4.163186731972197e-05, + "loss": 0.7654, + "step": 15050 + }, + { + "epoch": 0.81, + "learning_rate": 4.162663825199929e-05, + "loss": 0.7337, + "step": 15055 + }, + { + "epoch": 0.81, + "learning_rate": 4.162140787963824e-05, + "loss": 0.7798, + "step": 15060 + }, + { + "epoch": 0.81, + "learning_rate": 4.161617620304924e-05, + "loss": 0.7638, + "step": 15065 + }, + { + "epoch": 0.81, + "learning_rate": 4.16109432226428e-05, + "loss": 0.8025, + "step": 15070 + }, + { + "epoch": 0.81, + "learning_rate": 4.1605708938829535e-05, + "loss": 0.714, + "step": 15075 + }, + { + "epoch": 0.81, + "learning_rate": 4.1600473352020166e-05, + "loss": 0.7884, + "step": 15080 + }, + { + "epoch": 0.81, + "learning_rate": 4.15952364626255e-05, + "loss": 0.7055, + "step": 15085 + }, + { + "epoch": 0.81, + "learning_rate": 4.1589998271056473e-05, + "loss": 0.7577, + "step": 15090 + }, + { + "epoch": 0.81, + "learning_rate": 4.15847587777241e-05, + "loss": 0.6966, + "step": 15095 + }, + { + "epoch": 0.81, + "learning_rate": 4.1579517983039514e-05, + "loss": 0.7704, + "step": 15100 + }, + { + "epoch": 0.81, + "learning_rate": 4.157427588741394e-05, + "loss": 0.8016, + "step": 15105 + }, + { + "epoch": 0.81, + "learning_rate": 4.1569032491258695e-05, + "loss": 0.6717, + "step": 15110 + }, + { + "epoch": 0.81, + "learning_rate": 4.156378779498524e-05, + "loss": 0.804, + "step": 15115 + }, + { + "epoch": 0.81, + "learning_rate": 4.155854179900508e-05, + "loss": 0.7728, + "step": 15120 + }, + { + "epoch": 0.81, + "learning_rate": 4.1553294503729875e-05, + "loss": 0.745, + "step": 15125 + }, + { + "epoch": 0.81, + "learning_rate": 4.1548045909571354e-05, + "loss": 0.8904, + "step": 15130 + }, + { + "epoch": 0.81, + "learning_rate": 4.1542796016941344e-05, + "loss": 0.6619, + "step": 15135 + }, + { + "epoch": 0.81, + "learning_rate": 4.153754482625181e-05, + "loss": 0.6701, + "step": 15140 + }, + { + "epoch": 0.81, + "learning_rate": 4.1532292337914775e-05, + "loss": 0.6635, + "step": 15145 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527038552342394e-05, + "loss": 0.6441, + "step": 15150 + }, + { + "epoch": 0.81, + "learning_rate": 4.152178346994692e-05, + "loss": 0.6902, + "step": 15155 + }, + { + "epoch": 0.81, + "learning_rate": 4.15165270911407e-05, + "loss": 0.8804, + "step": 15160 + }, + { + "epoch": 0.81, + "learning_rate": 4.151126941633619e-05, + "loss": 0.9651, + "step": 15165 + }, + { + "epoch": 0.81, + "learning_rate": 4.150601044594591e-05, + "loss": 0.8558, + "step": 15170 + }, + { + "epoch": 0.81, + "learning_rate": 4.1500750180382555e-05, + "loss": 0.8583, + "step": 15175 + }, + { + "epoch": 0.81, + "learning_rate": 4.1495488620058865e-05, + "loss": 0.9248, + "step": 15180 + }, + { + "epoch": 0.81, + "learning_rate": 4.149022576538769e-05, + "loss": 0.6452, + "step": 15185 + }, + { + "epoch": 0.81, + "learning_rate": 4.1484961616782016e-05, + "loss": 0.746, + "step": 15190 + }, + { + "epoch": 0.81, + "learning_rate": 4.147969617465487e-05, + "loss": 0.7649, + "step": 15195 + }, + { + "epoch": 0.81, + "learning_rate": 4.1474429439419426e-05, + "loss": 0.8337, + "step": 15200 + }, + { + "epoch": 0.81, + "learning_rate": 4.146916141148896e-05, + "loss": 0.8712, + "step": 15205 + }, + { + "epoch": 0.81, + "learning_rate": 4.146389209127682e-05, + "loss": 0.6799, + "step": 15210 + }, + { + "epoch": 0.81, + "learning_rate": 4.145862147919648e-05, + "loss": 0.6875, + "step": 15215 + }, + { + "epoch": 0.81, + "learning_rate": 4.145334957566151e-05, + "loss": 0.8359, + "step": 15220 + }, + { + "epoch": 0.81, + "learning_rate": 4.144807638108558e-05, + "loss": 0.6971, + "step": 15225 + }, + { + "epoch": 0.81, + "learning_rate": 4.1442801895882454e-05, + "loss": 0.8152, + "step": 15230 + }, + { + "epoch": 0.82, + "learning_rate": 4.143752612046601e-05, + "loss": 0.7915, + "step": 15235 + }, + { + "epoch": 0.82, + "learning_rate": 4.143224905525021e-05, + "loss": 0.8422, + "step": 15240 + }, + { + "epoch": 0.82, + "learning_rate": 4.1426970700649147e-05, + "loss": 0.6184, + "step": 15245 + }, + { + "epoch": 0.82, + "learning_rate": 4.1421691057076975e-05, + "loss": 0.7222, + "step": 15250 + }, + { + "epoch": 0.82, + "learning_rate": 4.141641012494799e-05, + "loss": 0.8185, + "step": 15255 + }, + { + "epoch": 0.82, + "learning_rate": 4.1411127904676556e-05, + "loss": 0.7646, + "step": 15260 + }, + { + "epoch": 0.82, + "learning_rate": 4.1405844396677153e-05, + "loss": 0.856, + "step": 15265 + }, + { + "epoch": 0.82, + "learning_rate": 4.140055960136437e-05, + "loss": 0.774, + "step": 15270 + }, + { + "epoch": 0.82, + "learning_rate": 4.139527351915288e-05, + "loss": 0.8261, + "step": 15275 + }, + { + "epoch": 0.82, + "learning_rate": 4.138998615045747e-05, + "loss": 0.6593, + "step": 15280 + }, + { + "epoch": 0.82, + "learning_rate": 4.1384697495693014e-05, + "loss": 0.7459, + "step": 15285 + }, + { + "epoch": 0.82, + "learning_rate": 4.1379407555274507e-05, + "loss": 0.739, + "step": 15290 + }, + { + "epoch": 0.82, + "learning_rate": 4.137411632961702e-05, + "loss": 0.7252, + "step": 15295 + }, + { + "epoch": 0.82, + "learning_rate": 4.136882381913575e-05, + "loss": 0.7608, + "step": 15300 + }, + { + "epoch": 0.82, + "learning_rate": 4.1363530024245986e-05, + "loss": 0.8032, + "step": 15305 + }, + { + "epoch": 0.82, + "learning_rate": 4.13582349453631e-05, + "loss": 0.8075, + "step": 15310 + }, + { + "epoch": 0.82, + "learning_rate": 4.135293858290258e-05, + "loss": 0.8385, + "step": 15315 + }, + { + "epoch": 0.82, + "learning_rate": 4.134764093728003e-05, + "loss": 0.9119, + "step": 15320 + }, + { + "epoch": 0.82, + "learning_rate": 4.1342342008911126e-05, + "loss": 0.8657, + "step": 15325 + }, + { + "epoch": 0.82, + "learning_rate": 4.1337041798211675e-05, + "loss": 0.7458, + "step": 15330 + }, + { + "epoch": 0.82, + "learning_rate": 4.1331740305597546e-05, + "loss": 0.8361, + "step": 15335 + }, + { + "epoch": 0.82, + "learning_rate": 4.1326437531484734e-05, + "loss": 0.9129, + "step": 15340 + }, + { + "epoch": 0.82, + "learning_rate": 4.132113347628934e-05, + "loss": 0.8046, + "step": 15345 + }, + { + "epoch": 0.82, + "learning_rate": 4.131582814042755e-05, + "loss": 0.7866, + "step": 15350 + }, + { + "epoch": 0.82, + "learning_rate": 4.131052152431566e-05, + "loss": 0.6996, + "step": 15355 + }, + { + "epoch": 0.82, + "learning_rate": 4.1305213628370065e-05, + "loss": 0.7152, + "step": 15360 + }, + { + "epoch": 0.82, + "learning_rate": 4.1299904453007245e-05, + "loss": 0.7719, + "step": 15365 + }, + { + "epoch": 0.82, + "learning_rate": 4.1294593998643805e-05, + "loss": 0.7073, + "step": 15370 + }, + { + "epoch": 0.82, + "learning_rate": 4.1289282265696436e-05, + "loss": 0.635, + "step": 15375 + }, + { + "epoch": 0.82, + "learning_rate": 4.128396925458194e-05, + "loss": 0.7967, + "step": 15380 + }, + { + "epoch": 0.82, + "learning_rate": 4.12786549657172e-05, + "loss": 0.8185, + "step": 15385 + }, + { + "epoch": 0.82, + "learning_rate": 4.127333939951922e-05, + "loss": 0.7898, + "step": 15390 + }, + { + "epoch": 0.82, + "learning_rate": 4.1268022556405086e-05, + "loss": 0.8471, + "step": 15395 + }, + { + "epoch": 0.82, + "learning_rate": 4.1262704436792006e-05, + "loss": 0.7575, + "step": 15400 + }, + { + "epoch": 0.82, + "learning_rate": 4.125738504109726e-05, + "loss": 0.7044, + "step": 15405 + }, + { + "epoch": 0.82, + "learning_rate": 4.1252064369738256e-05, + "loss": 0.7422, + "step": 15410 + }, + { + "epoch": 0.82, + "learning_rate": 4.124674242313249e-05, + "loss": 0.8324, + "step": 15415 + }, + { + "epoch": 0.82, + "learning_rate": 4.124141920169755e-05, + "loss": 0.7179, + "step": 15420 + }, + { + "epoch": 0.83, + "learning_rate": 4.1236094705851136e-05, + "loss": 0.986, + "step": 15425 + }, + { + "epoch": 0.83, + "learning_rate": 4.1230768936011045e-05, + "loss": 0.6335, + "step": 15430 + }, + { + "epoch": 0.83, + "learning_rate": 4.122544189259517e-05, + "loss": 0.8705, + "step": 15435 + }, + { + "epoch": 0.83, + "learning_rate": 4.122011357602151e-05, + "loss": 0.7458, + "step": 15440 + }, + { + "epoch": 0.83, + "learning_rate": 4.1214783986708156e-05, + "loss": 0.7942, + "step": 15445 + }, + { + "epoch": 0.83, + "learning_rate": 4.12094531250733e-05, + "loss": 0.7531, + "step": 15450 + }, + { + "epoch": 0.83, + "learning_rate": 4.120412099153525e-05, + "loss": 0.6637, + "step": 15455 + }, + { + "epoch": 0.83, + "learning_rate": 4.119878758651241e-05, + "loss": 0.8635, + "step": 15460 + }, + { + "epoch": 0.83, + "learning_rate": 4.1193452910423246e-05, + "loss": 0.8071, + "step": 15465 + }, + { + "epoch": 0.83, + "learning_rate": 4.118811696368637e-05, + "loss": 0.6934, + "step": 15470 + }, + { + "epoch": 0.83, + "learning_rate": 4.118277974672047e-05, + "loss": 0.7153, + "step": 15475 + }, + { + "epoch": 0.83, + "learning_rate": 4.117744125994435e-05, + "loss": 0.7298, + "step": 15480 + }, + { + "epoch": 0.83, + "learning_rate": 4.117210150377689e-05, + "loss": 0.7669, + "step": 15485 + }, + { + "epoch": 0.83, + "learning_rate": 4.116676047863709e-05, + "loss": 0.6456, + "step": 15490 + }, + { + "epoch": 0.83, + "learning_rate": 4.116141818494406e-05, + "loss": 0.9272, + "step": 15495 + }, + { + "epoch": 0.83, + "learning_rate": 4.115607462311696e-05, + "loss": 0.7576, + "step": 15500 + }, + { + "epoch": 0.83, + "learning_rate": 4.1150729793575104e-05, + "loss": 0.7664, + "step": 15505 + }, + { + "epoch": 0.83, + "learning_rate": 4.114538369673787e-05, + "loss": 0.7221, + "step": 15510 + }, + { + "epoch": 0.83, + "learning_rate": 4.114003633302476e-05, + "loss": 0.6396, + "step": 15515 + }, + { + "epoch": 0.83, + "learning_rate": 4.1134687702855365e-05, + "loss": 0.766, + "step": 15520 + }, + { + "epoch": 0.83, + "learning_rate": 4.1129337806649365e-05, + "loss": 0.8143, + "step": 15525 + }, + { + "epoch": 0.83, + "learning_rate": 4.112398664482656e-05, + "loss": 0.7454, + "step": 15530 + }, + { + "epoch": 0.83, + "learning_rate": 4.111863421780683e-05, + "loss": 0.7416, + "step": 15535 + }, + { + "epoch": 0.83, + "learning_rate": 4.111328052601017e-05, + "loss": 0.6742, + "step": 15540 + }, + { + "epoch": 0.83, + "learning_rate": 4.110792556985666e-05, + "loss": 0.8823, + "step": 15545 + }, + { + "epoch": 0.83, + "learning_rate": 4.110256934976647e-05, + "loss": 0.8482, + "step": 15550 + }, + { + "epoch": 0.83, + "learning_rate": 4.109721186615992e-05, + "loss": 0.692, + "step": 15555 + }, + { + "epoch": 0.83, + "learning_rate": 4.109185311945738e-05, + "loss": 0.7132, + "step": 15560 + }, + { + "epoch": 0.83, + "learning_rate": 4.1086493110079326e-05, + "loss": 0.8627, + "step": 15565 + }, + { + "epoch": 0.83, + "learning_rate": 4.108113183844634e-05, + "loss": 0.8395, + "step": 15570 + }, + { + "epoch": 0.83, + "learning_rate": 4.107576930497912e-05, + "loss": 0.8132, + "step": 15575 + }, + { + "epoch": 0.83, + "learning_rate": 4.107040551009843e-05, + "loss": 0.8542, + "step": 15580 + }, + { + "epoch": 0.83, + "learning_rate": 4.106504045422515e-05, + "loss": 0.7814, + "step": 15585 + }, + { + "epoch": 0.83, + "learning_rate": 4.1059674137780275e-05, + "loss": 0.8744, + "step": 15590 + }, + { + "epoch": 0.83, + "learning_rate": 4.105430656118486e-05, + "loss": 0.7, + "step": 15595 + }, + { + "epoch": 0.83, + "learning_rate": 4.104893772486011e-05, + "loss": 0.6878, + "step": 15600 + }, + { + "epoch": 0.83, + "learning_rate": 4.1043567629227265e-05, + "loss": 0.807, + "step": 15605 + }, + { + "epoch": 0.84, + "learning_rate": 4.103819627470772e-05, + "loss": 0.723, + "step": 15610 + }, + { + "epoch": 0.84, + "learning_rate": 4.103282366172295e-05, + "loss": 0.6832, + "step": 15615 + }, + { + "epoch": 0.84, + "learning_rate": 4.102744979069452e-05, + "loss": 0.8209, + "step": 15620 + }, + { + "epoch": 0.84, + "learning_rate": 4.10220746620441e-05, + "loss": 0.8223, + "step": 15625 + }, + { + "epoch": 0.84, + "learning_rate": 4.101669827619346e-05, + "loss": 0.8338, + "step": 15630 + }, + { + "epoch": 0.84, + "learning_rate": 4.101132063356447e-05, + "loss": 0.7741, + "step": 15635 + }, + { + "epoch": 0.84, + "learning_rate": 4.100594173457909e-05, + "loss": 0.8217, + "step": 15640 + }, + { + "epoch": 0.84, + "learning_rate": 4.1000561579659396e-05, + "loss": 0.7858, + "step": 15645 + }, + { + "epoch": 0.84, + "learning_rate": 4.0995180169227536e-05, + "loss": 0.7079, + "step": 15650 + }, + { + "epoch": 0.84, + "learning_rate": 4.0989797503705784e-05, + "loss": 0.8236, + "step": 15655 + }, + { + "epoch": 0.84, + "learning_rate": 4.098441358351649e-05, + "loss": 0.7493, + "step": 15660 + }, + { + "epoch": 0.84, + "learning_rate": 4.097902840908213e-05, + "loss": 0.6868, + "step": 15665 + }, + { + "epoch": 0.84, + "learning_rate": 4.097364198082524e-05, + "loss": 0.8257, + "step": 15670 + }, + { + "epoch": 0.84, + "learning_rate": 4.096825429916849e-05, + "loss": 0.8279, + "step": 15675 + }, + { + "epoch": 0.84, + "learning_rate": 4.096286536453463e-05, + "loss": 0.7511, + "step": 15680 + }, + { + "epoch": 0.84, + "learning_rate": 4.095747517734651e-05, + "loss": 0.8088, + "step": 15685 + }, + { + "epoch": 0.84, + "learning_rate": 4.095208373802708e-05, + "loss": 0.8327, + "step": 15690 + }, + { + "epoch": 0.84, + "learning_rate": 4.09466910469994e-05, + "loss": 0.708, + "step": 15695 + }, + { + "epoch": 0.84, + "learning_rate": 4.0941297104686597e-05, + "loss": 0.9236, + "step": 15700 + }, + { + "epoch": 0.84, + "learning_rate": 4.093590191151193e-05, + "loss": 0.9189, + "step": 15705 + }, + { + "epoch": 0.84, + "learning_rate": 4.093050546789874e-05, + "loss": 0.6486, + "step": 15710 + }, + { + "epoch": 0.84, + "learning_rate": 4.092510777427048e-05, + "loss": 0.7899, + "step": 15715 + }, + { + "epoch": 0.84, + "learning_rate": 4.091970883105066e-05, + "loss": 0.6896, + "step": 15720 + }, + { + "epoch": 0.84, + "learning_rate": 4.0914308638662935e-05, + "loss": 0.8377, + "step": 15725 + }, + { + "epoch": 0.84, + "learning_rate": 4.0908907197531054e-05, + "loss": 0.7677, + "step": 15730 + }, + { + "epoch": 0.84, + "learning_rate": 4.0903504508078825e-05, + "loss": 0.8673, + "step": 15735 + }, + { + "epoch": 0.84, + "learning_rate": 4.08981005707302e-05, + "loss": 0.7684, + "step": 15740 + }, + { + "epoch": 0.84, + "learning_rate": 4.0892695385909195e-05, + "loss": 0.743, + "step": 15745 + }, + { + "epoch": 0.84, + "learning_rate": 4.0887288954039945e-05, + "loss": 0.6505, + "step": 15750 + }, + { + "epoch": 0.84, + "learning_rate": 4.088188127554666e-05, + "loss": 0.7623, + "step": 15755 + }, + { + "epoch": 0.84, + "learning_rate": 4.0876472350853695e-05, + "loss": 0.8144, + "step": 15760 + }, + { + "epoch": 0.84, + "learning_rate": 4.087106218038544e-05, + "loss": 0.8852, + "step": 15765 + }, + { + "epoch": 0.84, + "learning_rate": 4.086565076456643e-05, + "loss": 0.7938, + "step": 15770 + }, + { + "epoch": 0.84, + "learning_rate": 4.086023810382127e-05, + "loss": 0.8009, + "step": 15775 + }, + { + "epoch": 0.84, + "learning_rate": 4.0854824198574684e-05, + "loss": 0.6205, + "step": 15780 + }, + { + "epoch": 0.84, + "learning_rate": 4.0849409049251476e-05, + "loss": 0.6615, + "step": 15785 + }, + { + "epoch": 0.84, + "learning_rate": 4.084399265627656e-05, + "loss": 0.9059, + "step": 15790 + }, + { + "epoch": 0.84, + "learning_rate": 4.083857502007494e-05, + "loss": 0.7041, + "step": 15795 + }, + { + "epoch": 0.85, + "learning_rate": 4.0833156141071725e-05, + "loss": 0.7699, + "step": 15800 + }, + { + "epoch": 0.85, + "learning_rate": 4.082773601969212e-05, + "loss": 0.7804, + "step": 15805 + }, + { + "epoch": 0.85, + "learning_rate": 4.08223146563614e-05, + "loss": 0.8664, + "step": 15810 + }, + { + "epoch": 0.85, + "learning_rate": 4.0816892051504994e-05, + "loss": 0.797, + "step": 15815 + }, + { + "epoch": 0.85, + "learning_rate": 4.081146820554839e-05, + "loss": 0.9085, + "step": 15820 + }, + { + "epoch": 0.85, + "learning_rate": 4.080604311891716e-05, + "loss": 0.6532, + "step": 15825 + }, + { + "epoch": 0.85, + "learning_rate": 4.0800616792037e-05, + "loss": 0.6988, + "step": 15830 + }, + { + "epoch": 0.85, + "learning_rate": 4.079518922533371e-05, + "loss": 0.9728, + "step": 15835 + }, + { + "epoch": 0.85, + "learning_rate": 4.078976041923316e-05, + "loss": 0.8549, + "step": 15840 + }, + { + "epoch": 0.85, + "learning_rate": 4.078433037416133e-05, + "loss": 0.9004, + "step": 15845 + }, + { + "epoch": 0.85, + "learning_rate": 4.0778899090544306e-05, + "loss": 0.7381, + "step": 15850 + }, + { + "epoch": 0.85, + "learning_rate": 4.0773466568808264e-05, + "loss": 0.7542, + "step": 15855 + }, + { + "epoch": 0.85, + "learning_rate": 4.0768032809379474e-05, + "loss": 0.6511, + "step": 15860 + }, + { + "epoch": 0.85, + "learning_rate": 4.07625978126843e-05, + "loss": 0.8358, + "step": 15865 + }, + { + "epoch": 0.85, + "learning_rate": 4.075716157914922e-05, + "loss": 0.8453, + "step": 15870 + }, + { + "epoch": 0.85, + "learning_rate": 4.0751724109200786e-05, + "loss": 0.6953, + "step": 15875 + }, + { + "epoch": 0.85, + "learning_rate": 4.074628540326566e-05, + "loss": 0.8037, + "step": 15880 + }, + { + "epoch": 0.85, + "learning_rate": 4.074084546177061e-05, + "loss": 0.6102, + "step": 15885 + }, + { + "epoch": 0.85, + "learning_rate": 4.073540428514247e-05, + "loss": 0.7115, + "step": 15890 + }, + { + "epoch": 0.85, + "learning_rate": 4.0729961873808206e-05, + "loss": 0.7593, + "step": 15895 + }, + { + "epoch": 0.85, + "learning_rate": 4.072451822819487e-05, + "loss": 0.7675, + "step": 15900 + }, + { + "epoch": 0.85, + "learning_rate": 4.071907334872961e-05, + "loss": 0.7334, + "step": 15905 + }, + { + "epoch": 0.85, + "learning_rate": 4.071362723583966e-05, + "loss": 0.7047, + "step": 15910 + }, + { + "epoch": 0.85, + "learning_rate": 4.0708179889952344e-05, + "loss": 0.7953, + "step": 15915 + }, + { + "epoch": 0.85, + "learning_rate": 4.0702731311495124e-05, + "loss": 0.7965, + "step": 15920 + }, + { + "epoch": 0.85, + "learning_rate": 4.069728150089552e-05, + "loss": 0.7737, + "step": 15925 + }, + { + "epoch": 0.85, + "learning_rate": 4.069183045858116e-05, + "loss": 0.6713, + "step": 15930 + }, + { + "epoch": 0.85, + "learning_rate": 4.0686378184979775e-05, + "loss": 0.7556, + "step": 15935 + }, + { + "epoch": 0.85, + "learning_rate": 4.068092468051918e-05, + "loss": 0.8424, + "step": 15940 + }, + { + "epoch": 0.85, + "learning_rate": 4.06754699456273e-05, + "loss": 0.6981, + "step": 15945 + }, + { + "epoch": 0.85, + "learning_rate": 4.067001398073214e-05, + "loss": 0.8353, + "step": 15950 + }, + { + "epoch": 0.85, + "learning_rate": 4.066455678626183e-05, + "loss": 0.8526, + "step": 15955 + }, + { + "epoch": 0.85, + "learning_rate": 4.0659098362644566e-05, + "loss": 0.8427, + "step": 15960 + }, + { + "epoch": 0.85, + "learning_rate": 4.0653638710308654e-05, + "loss": 0.8564, + "step": 15965 + }, + { + "epoch": 0.85, + "learning_rate": 4.0648177829682486e-05, + "loss": 0.6733, + "step": 15970 + }, + { + "epoch": 0.85, + "learning_rate": 4.064271572119458e-05, + "loss": 0.7721, + "step": 15975 + }, + { + "epoch": 0.85, + "learning_rate": 4.063725238527352e-05, + "loss": 0.7879, + "step": 15980 + }, + { + "epoch": 0.86, + "learning_rate": 4.0631787822347985e-05, + "loss": 0.8468, + "step": 15985 + }, + { + "epoch": 0.86, + "learning_rate": 4.0626322032846794e-05, + "loss": 0.6828, + "step": 15990 + }, + { + "epoch": 0.86, + "learning_rate": 4.062085501719879e-05, + "loss": 0.6688, + "step": 15995 + }, + { + "epoch": 0.86, + "learning_rate": 4.0615386775832976e-05, + "loss": 0.8081, + "step": 16000 + }, + { + "epoch": 0.86, + "learning_rate": 4.060991730917842e-05, + "loss": 0.7331, + "step": 16005 + }, + { + "epoch": 0.86, + "learning_rate": 4.060444661766429e-05, + "loss": 0.9095, + "step": 16010 + }, + { + "epoch": 0.86, + "learning_rate": 4.059897470171987e-05, + "loss": 0.7211, + "step": 16015 + }, + { + "epoch": 0.86, + "learning_rate": 4.0593501561774503e-05, + "loss": 0.9216, + "step": 16020 + }, + { + "epoch": 0.86, + "learning_rate": 4.058802719825766e-05, + "loss": 0.7649, + "step": 16025 + }, + { + "epoch": 0.86, + "learning_rate": 4.058255161159889e-05, + "loss": 0.9022, + "step": 16030 + }, + { + "epoch": 0.86, + "learning_rate": 4.057707480222785e-05, + "loss": 0.6374, + "step": 16035 + }, + { + "epoch": 0.86, + "learning_rate": 4.0571596770574284e-05, + "loss": 0.7034, + "step": 16040 + }, + { + "epoch": 0.86, + "learning_rate": 4.056611751706804e-05, + "loss": 0.7342, + "step": 16045 + }, + { + "epoch": 0.86, + "learning_rate": 4.0560637042139056e-05, + "loss": 0.9441, + "step": 16050 + }, + { + "epoch": 0.86, + "learning_rate": 4.055515534621736e-05, + "loss": 0.7188, + "step": 16055 + }, + { + "epoch": 0.86, + "learning_rate": 4.0549672429733085e-05, + "loss": 0.7402, + "step": 16060 + }, + { + "epoch": 0.86, + "learning_rate": 4.054418829311647e-05, + "loss": 0.7769, + "step": 16065 + }, + { + "epoch": 0.86, + "learning_rate": 4.0538702936797824e-05, + "loss": 0.6366, + "step": 16070 + }, + { + "epoch": 0.86, + "learning_rate": 4.053321636120757e-05, + "loss": 0.7457, + "step": 16075 + }, + { + "epoch": 0.86, + "learning_rate": 4.0527728566776225e-05, + "loss": 0.7776, + "step": 16080 + }, + { + "epoch": 0.86, + "learning_rate": 4.052223955393439e-05, + "loss": 0.71, + "step": 16085 + }, + { + "epoch": 0.86, + "learning_rate": 4.051674932311277e-05, + "loss": 0.6044, + "step": 16090 + }, + { + "epoch": 0.86, + "learning_rate": 4.0511257874742175e-05, + "loss": 0.7029, + "step": 16095 + }, + { + "epoch": 0.86, + "learning_rate": 4.05057652092535e-05, + "loss": 0.8178, + "step": 16100 + }, + { + "epoch": 0.86, + "learning_rate": 4.050027132707773e-05, + "loss": 0.7541, + "step": 16105 + }, + { + "epoch": 0.86, + "learning_rate": 4.049477622864595e-05, + "loss": 0.7106, + "step": 16110 + }, + { + "epoch": 0.86, + "learning_rate": 4.0489279914389354e-05, + "loss": 0.7993, + "step": 16115 + }, + { + "epoch": 0.86, + "learning_rate": 4.048378238473921e-05, + "loss": 0.6655, + "step": 16120 + }, + { + "epoch": 0.86, + "learning_rate": 4.0478283640126897e-05, + "loss": 0.6923, + "step": 16125 + }, + { + "epoch": 0.86, + "learning_rate": 4.0472783680983885e-05, + "loss": 0.7375, + "step": 16130 + }, + { + "epoch": 0.86, + "learning_rate": 4.0467282507741725e-05, + "loss": 0.7332, + "step": 16135 + }, + { + "epoch": 0.86, + "learning_rate": 4.0461780120832094e-05, + "loss": 0.8744, + "step": 16140 + }, + { + "epoch": 0.86, + "learning_rate": 4.045627652068673e-05, + "loss": 0.8498, + "step": 16145 + }, + { + "epoch": 0.86, + "learning_rate": 4.04507717077375e-05, + "loss": 0.8059, + "step": 16150 + }, + { + "epoch": 0.86, + "learning_rate": 4.044526568241633e-05, + "loss": 0.8051, + "step": 16155 + }, + { + "epoch": 0.86, + "learning_rate": 4.043975844515528e-05, + "loss": 0.7595, + "step": 16160 + }, + { + "epoch": 0.86, + "learning_rate": 4.043424999638647e-05, + "loss": 0.8861, + "step": 16165 + }, + { + "epoch": 0.87, + "learning_rate": 4.042874033654214e-05, + "loss": 0.655, + "step": 16170 + }, + { + "epoch": 0.87, + "learning_rate": 4.04232294660546e-05, + "loss": 0.7882, + "step": 16175 + }, + { + "epoch": 0.87, + "learning_rate": 4.041771738535628e-05, + "loss": 0.6727, + "step": 16180 + }, + { + "epoch": 0.87, + "learning_rate": 4.04122040948797e-05, + "loss": 0.8422, + "step": 16185 + }, + { + "epoch": 0.87, + "learning_rate": 4.040668959505747e-05, + "loss": 0.7551, + "step": 16190 + }, + { + "epoch": 0.87, + "learning_rate": 4.040117388632228e-05, + "loss": 0.8242, + "step": 16195 + }, + { + "epoch": 0.87, + "learning_rate": 4.0395656969106946e-05, + "loss": 0.7644, + "step": 16200 + }, + { + "epoch": 0.87, + "learning_rate": 4.039013884384435e-05, + "loss": 0.6685, + "step": 16205 + }, + { + "epoch": 0.87, + "learning_rate": 4.0384619510967494e-05, + "loss": 0.7384, + "step": 16210 + }, + { + "epoch": 0.87, + "learning_rate": 4.037909897090946e-05, + "loss": 0.7545, + "step": 16215 + }, + { + "epoch": 0.87, + "learning_rate": 4.037357722410341e-05, + "loss": 0.8711, + "step": 16220 + }, + { + "epoch": 0.87, + "learning_rate": 4.0368054270982636e-05, + "loss": 0.7537, + "step": 16225 + }, + { + "epoch": 0.87, + "learning_rate": 4.03625301119805e-05, + "loss": 0.7159, + "step": 16230 + }, + { + "epoch": 0.87, + "learning_rate": 4.035700474753047e-05, + "loss": 0.8496, + "step": 16235 + }, + { + "epoch": 0.87, + "learning_rate": 4.035147817806609e-05, + "loss": 0.7534, + "step": 16240 + }, + { + "epoch": 0.87, + "learning_rate": 4.034595040402104e-05, + "loss": 0.7851, + "step": 16245 + }, + { + "epoch": 0.87, + "learning_rate": 4.034042142582903e-05, + "loss": 0.7378, + "step": 16250 + }, + { + "epoch": 0.87, + "learning_rate": 4.033489124392392e-05, + "loss": 0.8197, + "step": 16255 + }, + { + "epoch": 0.87, + "learning_rate": 4.032935985873965e-05, + "loss": 0.8229, + "step": 16260 + }, + { + "epoch": 0.87, + "learning_rate": 4.0323827270710235e-05, + "loss": 0.8895, + "step": 16265 + }, + { + "epoch": 0.87, + "learning_rate": 4.0318293480269806e-05, + "loss": 0.7482, + "step": 16270 + }, + { + "epoch": 0.87, + "learning_rate": 4.03127584878526e-05, + "loss": 0.9005, + "step": 16275 + }, + { + "epoch": 0.87, + "learning_rate": 4.03072222938929e-05, + "loss": 0.7047, + "step": 16280 + }, + { + "epoch": 0.87, + "learning_rate": 4.030168489882512e-05, + "loss": 0.8361, + "step": 16285 + }, + { + "epoch": 0.87, + "learning_rate": 4.029614630308378e-05, + "loss": 0.8218, + "step": 16290 + }, + { + "epoch": 0.87, + "learning_rate": 4.029060650710346e-05, + "loss": 0.6176, + "step": 16295 + }, + { + "epoch": 0.87, + "learning_rate": 4.0285065511318854e-05, + "loss": 0.7183, + "step": 16300 + }, + { + "epoch": 0.87, + "learning_rate": 4.027952331616475e-05, + "loss": 0.8603, + "step": 16305 + }, + { + "epoch": 0.87, + "learning_rate": 4.027397992207601e-05, + "loss": 0.7843, + "step": 16310 + }, + { + "epoch": 0.87, + "learning_rate": 4.026843532948762e-05, + "loss": 0.6523, + "step": 16315 + }, + { + "epoch": 0.87, + "learning_rate": 4.026288953883465e-05, + "loss": 0.7879, + "step": 16320 + }, + { + "epoch": 0.87, + "learning_rate": 4.025734255055225e-05, + "loss": 0.9354, + "step": 16325 + }, + { + "epoch": 0.87, + "learning_rate": 4.0251794365075676e-05, + "loss": 0.6322, + "step": 16330 + }, + { + "epoch": 0.87, + "learning_rate": 4.024624498284029e-05, + "loss": 0.9286, + "step": 16335 + }, + { + "epoch": 0.87, + "learning_rate": 4.024069440428151e-05, + "loss": 0.9716, + "step": 16340 + }, + { + "epoch": 0.87, + "learning_rate": 4.023514262983489e-05, + "loss": 0.9005, + "step": 16345 + }, + { + "epoch": 0.87, + "learning_rate": 4.022958965993605e-05, + "loss": 0.7018, + "step": 16350 + }, + { + "epoch": 0.87, + "learning_rate": 4.022403549502072e-05, + "loss": 0.8839, + "step": 16355 + }, + { + "epoch": 0.88, + "learning_rate": 4.021848013552471e-05, + "loss": 0.7201, + "step": 16360 + }, + { + "epoch": 0.88, + "learning_rate": 4.0212923581883946e-05, + "loss": 0.7888, + "step": 16365 + }, + { + "epoch": 0.88, + "learning_rate": 4.020736583453441e-05, + "loss": 0.8112, + "step": 16370 + }, + { + "epoch": 0.88, + "learning_rate": 4.020180689391222e-05, + "loss": 0.7379, + "step": 16375 + }, + { + "epoch": 0.88, + "learning_rate": 4.0196246760453555e-05, + "loss": 0.8324, + "step": 16380 + }, + { + "epoch": 0.88, + "learning_rate": 4.019068543459471e-05, + "loss": 0.7707, + "step": 16385 + }, + { + "epoch": 0.88, + "learning_rate": 4.0185122916772066e-05, + "loss": 0.9081, + "step": 16390 + }, + { + "epoch": 0.88, + "learning_rate": 4.017955920742208e-05, + "loss": 0.789, + "step": 16395 + }, + { + "epoch": 0.88, + "learning_rate": 4.017399430698133e-05, + "loss": 0.7814, + "step": 16400 + }, + { + "epoch": 0.88, + "learning_rate": 4.016842821588648e-05, + "loss": 0.8245, + "step": 16405 + }, + { + "epoch": 0.88, + "learning_rate": 4.0162860934574275e-05, + "loss": 0.7647, + "step": 16410 + }, + { + "epoch": 0.88, + "learning_rate": 4.015729246348157e-05, + "loss": 0.8532, + "step": 16415 + }, + { + "epoch": 0.88, + "learning_rate": 4.01517228030453e-05, + "loss": 0.7159, + "step": 16420 + }, + { + "epoch": 0.88, + "learning_rate": 4.014615195370248e-05, + "loss": 0.7297, + "step": 16425 + }, + { + "epoch": 0.88, + "learning_rate": 4.0140579915890264e-05, + "loss": 0.74, + "step": 16430 + }, + { + "epoch": 0.88, + "learning_rate": 4.0135006690045864e-05, + "loss": 0.763, + "step": 16435 + }, + { + "epoch": 0.88, + "learning_rate": 4.012943227660659e-05, + "loss": 0.7939, + "step": 16440 + }, + { + "epoch": 0.88, + "learning_rate": 4.012385667600985e-05, + "loss": 0.8639, + "step": 16445 + }, + { + "epoch": 0.88, + "learning_rate": 4.011827988869313e-05, + "loss": 0.8524, + "step": 16450 + }, + { + "epoch": 0.88, + "learning_rate": 4.011270191509404e-05, + "loss": 0.6646, + "step": 16455 + }, + { + "epoch": 0.88, + "learning_rate": 4.0107122755650264e-05, + "loss": 0.6719, + "step": 16460 + }, + { + "epoch": 0.88, + "learning_rate": 4.010154241079957e-05, + "loss": 0.7728, + "step": 16465 + }, + { + "epoch": 0.88, + "learning_rate": 4.009596088097985e-05, + "loss": 0.6417, + "step": 16470 + }, + { + "epoch": 0.88, + "learning_rate": 4.009037816662904e-05, + "loss": 0.5898, + "step": 16475 + }, + { + "epoch": 0.88, + "learning_rate": 4.008479426818521e-05, + "loss": 0.9297, + "step": 16480 + }, + { + "epoch": 0.88, + "learning_rate": 4.007920918608652e-05, + "loss": 0.7191, + "step": 16485 + }, + { + "epoch": 0.88, + "learning_rate": 4.007362292077119e-05, + "loss": 0.8802, + "step": 16490 + }, + { + "epoch": 0.88, + "learning_rate": 4.006803547267759e-05, + "loss": 0.7134, + "step": 16495 + }, + { + "epoch": 0.88, + "learning_rate": 4.006244684224412e-05, + "loss": 0.8336, + "step": 16500 + }, + { + "epoch": 0.88, + "learning_rate": 4.005685702990932e-05, + "loss": 0.6985, + "step": 16505 + }, + { + "epoch": 0.88, + "learning_rate": 4.005126603611179e-05, + "loss": 0.7269, + "step": 16510 + }, + { + "epoch": 0.88, + "learning_rate": 4.004567386129025e-05, + "loss": 0.7239, + "step": 16515 + }, + { + "epoch": 0.88, + "learning_rate": 4.0040080505883484e-05, + "loss": 0.8284, + "step": 16520 + }, + { + "epoch": 0.88, + "learning_rate": 4.0034485970330394e-05, + "loss": 0.7905, + "step": 16525 + }, + { + "epoch": 0.88, + "learning_rate": 4.002889025506997e-05, + "loss": 0.8063, + "step": 16530 + }, + { + "epoch": 0.88, + "learning_rate": 4.002329336054128e-05, + "loss": 0.7231, + "step": 16535 + }, + { + "epoch": 0.88, + "learning_rate": 4.001769528718351e-05, + "loss": 0.8226, + "step": 16540 + }, + { + "epoch": 0.89, + "learning_rate": 4.00120960354359e-05, + "loss": 0.7639, + "step": 16545 + }, + { + "epoch": 0.89, + "learning_rate": 4.0006495605737815e-05, + "loss": 0.8261, + "step": 16550 + }, + { + "epoch": 0.89, + "learning_rate": 4.000089399852871e-05, + "loss": 0.864, + "step": 16555 + }, + { + "epoch": 0.89, + "learning_rate": 3.999529121424812e-05, + "loss": 0.8983, + "step": 16560 + }, + { + "epoch": 0.89, + "learning_rate": 3.9989687253335674e-05, + "loss": 0.7784, + "step": 16565 + }, + { + "epoch": 0.89, + "learning_rate": 3.99840821162311e-05, + "loss": 0.8629, + "step": 16570 + }, + { + "epoch": 0.89, + "learning_rate": 3.9978475803374215e-05, + "loss": 0.7185, + "step": 16575 + }, + { + "epoch": 0.89, + "learning_rate": 3.9972868315204924e-05, + "loss": 0.7348, + "step": 16580 + }, + { + "epoch": 0.89, + "learning_rate": 3.996725965216323e-05, + "loss": 0.7805, + "step": 16585 + }, + { + "epoch": 0.89, + "learning_rate": 3.996164981468923e-05, + "loss": 0.8034, + "step": 16590 + }, + { + "epoch": 0.89, + "learning_rate": 3.9956038803223115e-05, + "loss": 0.7638, + "step": 16595 + }, + { + "epoch": 0.89, + "learning_rate": 3.995042661820515e-05, + "loss": 0.7141, + "step": 16600 + }, + { + "epoch": 0.89, + "learning_rate": 3.9944813260075706e-05, + "loss": 0.8057, + "step": 16605 + }, + { + "epoch": 0.89, + "learning_rate": 3.993919872927525e-05, + "loss": 0.6932, + "step": 16610 + }, + { + "epoch": 0.89, + "learning_rate": 3.9933583026244333e-05, + "loss": 0.7585, + "step": 16615 + }, + { + "epoch": 0.89, + "learning_rate": 3.992796615142362e-05, + "loss": 0.7144, + "step": 16620 + }, + { + "epoch": 0.89, + "learning_rate": 3.992234810525381e-05, + "loss": 0.8344, + "step": 16625 + }, + { + "epoch": 0.89, + "learning_rate": 3.991672888817578e-05, + "loss": 0.7852, + "step": 16630 + }, + { + "epoch": 0.89, + "learning_rate": 3.991110850063041e-05, + "loss": 0.6898, + "step": 16635 + }, + { + "epoch": 0.89, + "learning_rate": 3.9905486943058736e-05, + "loss": 0.7883, + "step": 16640 + }, + { + "epoch": 0.89, + "learning_rate": 3.989986421590185e-05, + "loss": 0.8072, + "step": 16645 + }, + { + "epoch": 0.89, + "learning_rate": 3.9894240319600965e-05, + "loss": 0.7579, + "step": 16650 + }, + { + "epoch": 0.89, + "learning_rate": 3.988861525459736e-05, + "loss": 0.6968, + "step": 16655 + }, + { + "epoch": 0.89, + "learning_rate": 3.988298902133242e-05, + "loss": 0.8827, + "step": 16660 + }, + { + "epoch": 0.89, + "learning_rate": 3.987736162024762e-05, + "loss": 0.7285, + "step": 16665 + }, + { + "epoch": 0.89, + "learning_rate": 3.987173305178451e-05, + "loss": 0.8232, + "step": 16670 + }, + { + "epoch": 0.89, + "learning_rate": 3.986610331638476e-05, + "loss": 0.7227, + "step": 16675 + }, + { + "epoch": 0.89, + "learning_rate": 3.98604724144901e-05, + "loss": 0.8194, + "step": 16680 + }, + { + "epoch": 0.89, + "learning_rate": 3.9854840346542395e-05, + "loss": 0.7903, + "step": 16685 + }, + { + "epoch": 0.89, + "learning_rate": 3.9849207112983553e-05, + "loss": 0.7442, + "step": 16690 + }, + { + "epoch": 0.89, + "learning_rate": 3.98435727142556e-05, + "loss": 0.8516, + "step": 16695 + }, + { + "epoch": 0.89, + "learning_rate": 3.983793715080066e-05, + "loss": 0.6535, + "step": 16700 + }, + { + "epoch": 0.89, + "learning_rate": 3.983230042306093e-05, + "loss": 0.661, + "step": 16705 + }, + { + "epoch": 0.89, + "learning_rate": 3.9826662531478696e-05, + "loss": 0.6785, + "step": 16710 + }, + { + "epoch": 0.89, + "learning_rate": 3.982102347649636e-05, + "loss": 0.7567, + "step": 16715 + }, + { + "epoch": 0.89, + "learning_rate": 3.9815383258556385e-05, + "loss": 0.6886, + "step": 16720 + }, + { + "epoch": 0.89, + "learning_rate": 3.980974187810136e-05, + "loss": 0.805, + "step": 16725 + }, + { + "epoch": 0.89, + "learning_rate": 3.980409933557393e-05, + "loss": 0.8739, + "step": 16730 + }, + { + "epoch": 0.9, + "learning_rate": 3.9798455631416854e-05, + "loss": 0.737, + "step": 16735 + }, + { + "epoch": 0.9, + "learning_rate": 3.979281076607297e-05, + "loss": 0.8015, + "step": 16740 + }, + { + "epoch": 0.9, + "learning_rate": 3.978716473998523e-05, + "loss": 0.7946, + "step": 16745 + }, + { + "epoch": 0.9, + "learning_rate": 3.978151755359663e-05, + "loss": 0.7053, + "step": 16750 + }, + { + "epoch": 0.9, + "learning_rate": 3.977586920735031e-05, + "loss": 0.8706, + "step": 16755 + }, + { + "epoch": 0.9, + "learning_rate": 3.977021970168947e-05, + "loss": 0.8629, + "step": 16760 + }, + { + "epoch": 0.9, + "learning_rate": 3.976456903705741e-05, + "loss": 0.7652, + "step": 16765 + }, + { + "epoch": 0.9, + "learning_rate": 3.9758917213897506e-05, + "loss": 0.8781, + "step": 16770 + }, + { + "epoch": 0.9, + "learning_rate": 3.975326423265325e-05, + "loss": 0.6837, + "step": 16775 + }, + { + "epoch": 0.9, + "learning_rate": 3.974761009376822e-05, + "loss": 0.7895, + "step": 16780 + }, + { + "epoch": 0.9, + "learning_rate": 3.974195479768607e-05, + "loss": 0.7482, + "step": 16785 + }, + { + "epoch": 0.9, + "learning_rate": 3.9736298344850554e-05, + "loss": 0.7473, + "step": 16790 + }, + { + "epoch": 0.9, + "learning_rate": 3.973064073570551e-05, + "loss": 0.8753, + "step": 16795 + }, + { + "epoch": 0.9, + "learning_rate": 3.9724981970694883e-05, + "loss": 0.8102, + "step": 16800 + }, + { + "epoch": 0.9, + "learning_rate": 3.971932205026268e-05, + "loss": 0.8776, + "step": 16805 + }, + { + "epoch": 0.9, + "learning_rate": 3.971366097485304e-05, + "loss": 0.8675, + "step": 16810 + }, + { + "epoch": 0.9, + "learning_rate": 3.970799874491014e-05, + "loss": 0.8738, + "step": 16815 + }, + { + "epoch": 0.9, + "learning_rate": 3.9702335360878316e-05, + "loss": 0.8247, + "step": 16820 + }, + { + "epoch": 0.9, + "learning_rate": 3.969667082320193e-05, + "loss": 0.7889, + "step": 16825 + }, + { + "epoch": 0.9, + "learning_rate": 3.9691005132325456e-05, + "loss": 0.7191, + "step": 16830 + }, + { + "epoch": 0.9, + "learning_rate": 3.9685338288693475e-05, + "loss": 0.9174, + "step": 16835 + }, + { + "epoch": 0.9, + "learning_rate": 3.967967029275063e-05, + "loss": 0.732, + "step": 16840 + }, + { + "epoch": 0.9, + "learning_rate": 3.96740011449417e-05, + "loss": 0.6945, + "step": 16845 + }, + { + "epoch": 0.9, + "learning_rate": 3.966833084571149e-05, + "loss": 0.7886, + "step": 16850 + }, + { + "epoch": 0.9, + "learning_rate": 3.966265939550495e-05, + "loss": 0.7362, + "step": 16855 + }, + { + "epoch": 0.9, + "learning_rate": 3.965698679476709e-05, + "loss": 0.8332, + "step": 16860 + }, + { + "epoch": 0.9, + "learning_rate": 3.965131304394304e-05, + "loss": 0.7917, + "step": 16865 + }, + { + "epoch": 0.9, + "learning_rate": 3.964563814347798e-05, + "loss": 0.7757, + "step": 16870 + }, + { + "epoch": 0.9, + "learning_rate": 3.963996209381721e-05, + "loss": 0.7305, + "step": 16875 + }, + { + "epoch": 0.9, + "learning_rate": 3.96342848954061e-05, + "loss": 0.6734, + "step": 16880 + }, + { + "epoch": 0.9, + "learning_rate": 3.962860654869014e-05, + "loss": 0.7232, + "step": 16885 + }, + { + "epoch": 0.9, + "learning_rate": 3.9622927054114885e-05, + "loss": 0.8664, + "step": 16890 + }, + { + "epoch": 0.9, + "learning_rate": 3.961724641212598e-05, + "loss": 0.7922, + "step": 16895 + }, + { + "epoch": 0.9, + "learning_rate": 3.9611564623169166e-05, + "loss": 0.7069, + "step": 16900 + }, + { + "epoch": 0.9, + "learning_rate": 3.960588168769029e-05, + "loss": 0.8122, + "step": 16905 + }, + { + "epoch": 0.9, + "learning_rate": 3.960019760613525e-05, + "loss": 0.6678, + "step": 16910 + }, + { + "epoch": 0.9, + "learning_rate": 3.959451237895008e-05, + "loss": 0.7489, + "step": 16915 + }, + { + "epoch": 0.91, + "learning_rate": 3.9588826006580864e-05, + "loss": 0.7602, + "step": 16920 + }, + { + "epoch": 0.91, + "learning_rate": 3.9583138489473806e-05, + "loss": 0.7805, + "step": 16925 + }, + { + "epoch": 0.91, + "learning_rate": 3.9577449828075177e-05, + "loss": 0.6593, + "step": 16930 + }, + { + "epoch": 0.91, + "learning_rate": 3.957176002283136e-05, + "loss": 0.774, + "step": 16935 + }, + { + "epoch": 0.91, + "learning_rate": 3.95660690741888e-05, + "loss": 0.8005, + "step": 16940 + }, + { + "epoch": 0.91, + "learning_rate": 3.9560376982594054e-05, + "loss": 0.6867, + "step": 16945 + }, + { + "epoch": 0.91, + "learning_rate": 3.955468374849377e-05, + "loss": 0.9174, + "step": 16950 + }, + { + "epoch": 0.91, + "learning_rate": 3.9548989372334664e-05, + "loss": 0.7258, + "step": 16955 + }, + { + "epoch": 0.91, + "learning_rate": 3.954329385456357e-05, + "loss": 0.7968, + "step": 16960 + }, + { + "epoch": 0.91, + "learning_rate": 3.953759719562738e-05, + "loss": 0.7154, + "step": 16965 + }, + { + "epoch": 0.91, + "learning_rate": 3.953189939597311e-05, + "loss": 0.9012, + "step": 16970 + }, + { + "epoch": 0.91, + "learning_rate": 3.9526200456047825e-05, + "loss": 0.8471, + "step": 16975 + }, + { + "epoch": 0.91, + "learning_rate": 3.952050037629873e-05, + "loss": 0.7506, + "step": 16980 + }, + { + "epoch": 0.91, + "learning_rate": 3.951479915717307e-05, + "loss": 0.7195, + "step": 16985 + }, + { + "epoch": 0.91, + "learning_rate": 3.950909679911822e-05, + "loss": 0.8235, + "step": 16990 + }, + { + "epoch": 0.91, + "learning_rate": 3.95033933025816e-05, + "loss": 0.7116, + "step": 16995 + }, + { + "epoch": 0.91, + "learning_rate": 3.9497688668010765e-05, + "loss": 0.7941, + "step": 17000 + }, + { + "epoch": 0.91, + "learning_rate": 3.9491982895853336e-05, + "loss": 0.8068, + "step": 17005 + }, + { + "epoch": 0.91, + "learning_rate": 3.948627598655702e-05, + "loss": 0.7387, + "step": 17010 + }, + { + "epoch": 0.91, + "learning_rate": 3.948056794056963e-05, + "loss": 0.6297, + "step": 17015 + }, + { + "epoch": 0.91, + "learning_rate": 3.947485875833905e-05, + "loss": 0.7021, + "step": 17020 + }, + { + "epoch": 0.91, + "learning_rate": 3.946914844031326e-05, + "loss": 0.6078, + "step": 17025 + }, + { + "epoch": 0.91, + "learning_rate": 3.946343698694034e-05, + "loss": 0.8473, + "step": 17030 + }, + { + "epoch": 0.91, + "learning_rate": 3.945772439866843e-05, + "loss": 0.7613, + "step": 17035 + }, + { + "epoch": 0.91, + "learning_rate": 3.945201067594579e-05, + "loss": 0.8674, + "step": 17040 + }, + { + "epoch": 0.91, + "learning_rate": 3.944629581922077e-05, + "loss": 0.8318, + "step": 17045 + }, + { + "epoch": 0.91, + "learning_rate": 3.944057982894178e-05, + "loss": 0.8046, + "step": 17050 + }, + { + "epoch": 0.91, + "learning_rate": 3.943486270555734e-05, + "loss": 0.6783, + "step": 17055 + }, + { + "epoch": 0.91, + "learning_rate": 3.942914444951604e-05, + "loss": 0.745, + "step": 17060 + }, + { + "epoch": 0.91, + "learning_rate": 3.9423425061266606e-05, + "loss": 0.7864, + "step": 17065 + }, + { + "epoch": 0.91, + "learning_rate": 3.9417704541257785e-05, + "loss": 0.693, + "step": 17070 + }, + { + "epoch": 0.91, + "learning_rate": 3.941198288993847e-05, + "loss": 0.7757, + "step": 17075 + }, + { + "epoch": 0.91, + "learning_rate": 3.940626010775761e-05, + "loss": 0.7796, + "step": 17080 + }, + { + "epoch": 0.91, + "learning_rate": 3.940053619516426e-05, + "loss": 0.5972, + "step": 17085 + }, + { + "epoch": 0.91, + "learning_rate": 3.939481115260755e-05, + "loss": 0.7691, + "step": 17090 + }, + { + "epoch": 0.91, + "learning_rate": 3.9389084980536705e-05, + "loss": 0.8229, + "step": 17095 + }, + { + "epoch": 0.91, + "learning_rate": 3.938335767940105e-05, + "loss": 0.6784, + "step": 17100 + }, + { + "epoch": 0.92, + "learning_rate": 3.9377629249649985e-05, + "loss": 0.6522, + "step": 17105 + }, + { + "epoch": 0.92, + "learning_rate": 3.9371899691732986e-05, + "loss": 0.8549, + "step": 17110 + }, + { + "epoch": 0.92, + "learning_rate": 3.936616900609964e-05, + "loss": 0.7314, + "step": 17115 + }, + { + "epoch": 0.92, + "learning_rate": 3.936043719319963e-05, + "loss": 0.6969, + "step": 17120 + }, + { + "epoch": 0.92, + "learning_rate": 3.9354704253482696e-05, + "loss": 0.6828, + "step": 17125 + }, + { + "epoch": 0.92, + "learning_rate": 3.9348970187398684e-05, + "loss": 0.9116, + "step": 17130 + }, + { + "epoch": 0.92, + "learning_rate": 3.934323499539755e-05, + "loss": 0.791, + "step": 17135 + }, + { + "epoch": 0.92, + "learning_rate": 3.9337498677929286e-05, + "loss": 0.742, + "step": 17140 + }, + { + "epoch": 0.92, + "learning_rate": 3.933176123544401e-05, + "loss": 0.8269, + "step": 17145 + }, + { + "epoch": 0.92, + "learning_rate": 3.932602266839193e-05, + "loss": 0.671, + "step": 17150 + }, + { + "epoch": 0.92, + "learning_rate": 3.9320282977223335e-05, + "loss": 0.8394, + "step": 17155 + }, + { + "epoch": 0.92, + "learning_rate": 3.931454216238858e-05, + "loss": 0.7843, + "step": 17160 + }, + { + "epoch": 0.92, + "learning_rate": 3.930880022433815e-05, + "loss": 0.7456, + "step": 17165 + }, + { + "epoch": 0.92, + "learning_rate": 3.9303057163522586e-05, + "loss": 0.7608, + "step": 17170 + }, + { + "epoch": 0.92, + "learning_rate": 3.929731298039252e-05, + "loss": 0.7318, + "step": 17175 + }, + { + "epoch": 0.92, + "learning_rate": 3.92915676753987e-05, + "loss": 0.7529, + "step": 17180 + }, + { + "epoch": 0.92, + "learning_rate": 3.9285821248991915e-05, + "loss": 0.6915, + "step": 17185 + }, + { + "epoch": 0.92, + "learning_rate": 3.92800737016231e-05, + "loss": 0.6232, + "step": 17190 + }, + { + "epoch": 0.92, + "learning_rate": 3.927432503374322e-05, + "loss": 0.7671, + "step": 17195 + }, + { + "epoch": 0.92, + "learning_rate": 3.926857524580336e-05, + "loss": 0.7864, + "step": 17200 + }, + { + "epoch": 0.92, + "learning_rate": 3.92628243382547e-05, + "loss": 0.7925, + "step": 17205 + }, + { + "epoch": 0.92, + "learning_rate": 3.925707231154848e-05, + "loss": 0.8217, + "step": 17210 + }, + { + "epoch": 0.92, + "learning_rate": 3.9251319166136046e-05, + "loss": 0.705, + "step": 17215 + }, + { + "epoch": 0.92, + "learning_rate": 3.924556490246884e-05, + "loss": 0.776, + "step": 17220 + }, + { + "epoch": 0.92, + "learning_rate": 3.923980952099836e-05, + "loss": 0.798, + "step": 17225 + }, + { + "epoch": 0.92, + "learning_rate": 3.923405302217623e-05, + "loss": 0.6944, + "step": 17230 + }, + { + "epoch": 0.92, + "learning_rate": 3.922829540645414e-05, + "loss": 0.6899, + "step": 17235 + }, + { + "epoch": 0.92, + "learning_rate": 3.922253667428387e-05, + "loss": 0.7824, + "step": 17240 + }, + { + "epoch": 0.92, + "learning_rate": 3.9216776826117286e-05, + "loss": 0.875, + "step": 17245 + }, + { + "epoch": 0.92, + "learning_rate": 3.921101586240634e-05, + "loss": 0.9044, + "step": 17250 + }, + { + "epoch": 0.92, + "learning_rate": 3.920525378360309e-05, + "loss": 0.7314, + "step": 17255 + }, + { + "epoch": 0.92, + "learning_rate": 3.919949059015966e-05, + "loss": 0.7633, + "step": 17260 + }, + { + "epoch": 0.92, + "learning_rate": 3.919372628252827e-05, + "loss": 0.8406, + "step": 17265 + }, + { + "epoch": 0.92, + "learning_rate": 3.918796086116122e-05, + "loss": 0.8061, + "step": 17270 + }, + { + "epoch": 0.92, + "learning_rate": 3.9182194326510916e-05, + "loss": 0.7262, + "step": 17275 + }, + { + "epoch": 0.92, + "learning_rate": 3.9176426679029835e-05, + "loss": 0.7744, + "step": 17280 + }, + { + "epoch": 0.92, + "learning_rate": 3.917065791917053e-05, + "loss": 0.8709, + "step": 17285 + }, + { + "epoch": 0.92, + "learning_rate": 3.916488804738568e-05, + "loss": 0.9183, + "step": 17290 + }, + { + "epoch": 0.93, + "learning_rate": 3.9159117064128027e-05, + "loss": 0.8939, + "step": 17295 + }, + { + "epoch": 0.93, + "learning_rate": 3.9153344969850384e-05, + "loss": 0.6784, + "step": 17300 + }, + { + "epoch": 0.93, + "learning_rate": 3.914757176500567e-05, + "loss": 0.9736, + "step": 17305 + }, + { + "epoch": 0.93, + "learning_rate": 3.914179745004691e-05, + "loss": 0.8132, + "step": 17310 + }, + { + "epoch": 0.93, + "learning_rate": 3.913602202542718e-05, + "loss": 0.6945, + "step": 17315 + }, + { + "epoch": 0.93, + "learning_rate": 3.913024549159966e-05, + "loss": 0.8214, + "step": 17320 + }, + { + "epoch": 0.93, + "learning_rate": 3.912446784901762e-05, + "loss": 0.6764, + "step": 17325 + }, + { + "epoch": 0.93, + "learning_rate": 3.911868909813441e-05, + "loss": 0.6392, + "step": 17330 + }, + { + "epoch": 0.93, + "learning_rate": 3.9112909239403475e-05, + "loss": 0.8495, + "step": 17335 + }, + { + "epoch": 0.93, + "learning_rate": 3.910712827327833e-05, + "loss": 0.5846, + "step": 17340 + }, + { + "epoch": 0.93, + "learning_rate": 3.91013462002126e-05, + "loss": 0.6663, + "step": 17345 + }, + { + "epoch": 0.93, + "learning_rate": 3.909556302065998e-05, + "loss": 0.8245, + "step": 17350 + }, + { + "epoch": 0.93, + "learning_rate": 3.908977873507425e-05, + "loss": 0.6627, + "step": 17355 + }, + { + "epoch": 0.93, + "learning_rate": 3.908399334390931e-05, + "loss": 0.7817, + "step": 17360 + }, + { + "epoch": 0.93, + "learning_rate": 3.907820684761909e-05, + "loss": 0.7928, + "step": 17365 + }, + { + "epoch": 0.93, + "learning_rate": 3.9072419246657655e-05, + "loss": 0.932, + "step": 17370 + }, + { + "epoch": 0.93, + "learning_rate": 3.906663054147913e-05, + "loss": 0.7943, + "step": 17375 + }, + { + "epoch": 0.93, + "learning_rate": 3.906084073253775e-05, + "loss": 0.8446, + "step": 17380 + }, + { + "epoch": 0.93, + "learning_rate": 3.905504982028781e-05, + "loss": 0.7757, + "step": 17385 + }, + { + "epoch": 0.93, + "learning_rate": 3.9049257805183714e-05, + "loss": 0.7323, + "step": 17390 + }, + { + "epoch": 0.93, + "learning_rate": 3.904346468767993e-05, + "loss": 0.6872, + "step": 17395 + }, + { + "epoch": 0.93, + "learning_rate": 3.9037670468231037e-05, + "loss": 0.5631, + "step": 17400 + }, + { + "epoch": 0.93, + "learning_rate": 3.9031875147291684e-05, + "loss": 0.7096, + "step": 17405 + }, + { + "epoch": 0.93, + "learning_rate": 3.9026078725316605e-05, + "loss": 0.8894, + "step": 17410 + }, + { + "epoch": 0.93, + "learning_rate": 3.902028120276063e-05, + "loss": 0.72, + "step": 17415 + }, + { + "epoch": 0.93, + "learning_rate": 3.9014482580078684e-05, + "loss": 0.8377, + "step": 17420 + }, + { + "epoch": 0.93, + "learning_rate": 3.900868285772575e-05, + "loss": 0.7026, + "step": 17425 + }, + { + "epoch": 0.93, + "learning_rate": 3.900288203615692e-05, + "loss": 0.7654, + "step": 17430 + }, + { + "epoch": 0.93, + "learning_rate": 3.899708011582737e-05, + "loss": 0.8032, + "step": 17435 + }, + { + "epoch": 0.93, + "learning_rate": 3.899127709719234e-05, + "loss": 0.7333, + "step": 17440 + }, + { + "epoch": 0.93, + "learning_rate": 3.898547298070719e-05, + "loss": 0.8479, + "step": 17445 + }, + { + "epoch": 0.93, + "learning_rate": 3.8979667766827355e-05, + "loss": 0.6394, + "step": 17450 + }, + { + "epoch": 0.93, + "learning_rate": 3.897386145600834e-05, + "loss": 0.8133, + "step": 17455 + }, + { + "epoch": 0.93, + "learning_rate": 3.896805404870575e-05, + "loss": 0.8142, + "step": 17460 + }, + { + "epoch": 0.93, + "learning_rate": 3.896224554537527e-05, + "loss": 0.6381, + "step": 17465 + }, + { + "epoch": 0.93, + "learning_rate": 3.895643594647268e-05, + "loss": 0.5959, + "step": 17470 + }, + { + "epoch": 0.93, + "learning_rate": 3.895062525245384e-05, + "loss": 0.742, + "step": 17475 + }, + { + "epoch": 0.94, + "learning_rate": 3.8944813463774705e-05, + "loss": 0.6821, + "step": 17480 + }, + { + "epoch": 0.94, + "learning_rate": 3.893900058089128e-05, + "loss": 0.808, + "step": 17485 + }, + { + "epoch": 0.94, + "learning_rate": 3.8933186604259715e-05, + "loss": 0.6793, + "step": 17490 + }, + { + "epoch": 0.94, + "learning_rate": 3.8927371534336196e-05, + "loss": 0.7619, + "step": 17495 + }, + { + "epoch": 0.94, + "learning_rate": 3.8921555371577e-05, + "loss": 0.6483, + "step": 17500 + }, + { + "epoch": 0.94, + "learning_rate": 3.891573811643854e-05, + "loss": 0.899, + "step": 17505 + }, + { + "epoch": 0.94, + "learning_rate": 3.890991976937725e-05, + "loss": 0.7156, + "step": 17510 + }, + { + "epoch": 0.94, + "learning_rate": 3.890410033084968e-05, + "loss": 0.7189, + "step": 17515 + }, + { + "epoch": 0.94, + "learning_rate": 3.889827980131246e-05, + "loss": 0.9347, + "step": 17520 + }, + { + "epoch": 0.94, + "learning_rate": 3.889245818122232e-05, + "loss": 0.6657, + "step": 17525 + }, + { + "epoch": 0.94, + "learning_rate": 3.8886635471036056e-05, + "loss": 0.7726, + "step": 17530 + }, + { + "epoch": 0.94, + "learning_rate": 3.8880811671210556e-05, + "loss": 0.7855, + "step": 17535 + }, + { + "epoch": 0.94, + "learning_rate": 3.887498678220279e-05, + "loss": 0.7795, + "step": 17540 + }, + { + "epoch": 0.94, + "learning_rate": 3.8869160804469834e-05, + "loss": 0.7495, + "step": 17545 + }, + { + "epoch": 0.94, + "learning_rate": 3.8863333738468824e-05, + "loss": 0.8478, + "step": 17550 + }, + { + "epoch": 0.94, + "learning_rate": 3.885750558465698e-05, + "loss": 0.8729, + "step": 17555 + }, + { + "epoch": 0.94, + "learning_rate": 3.885167634349165e-05, + "loss": 0.9165, + "step": 17560 + }, + { + "epoch": 0.94, + "learning_rate": 3.8845846015430195e-05, + "loss": 0.7312, + "step": 17565 + }, + { + "epoch": 0.94, + "learning_rate": 3.884001460093013e-05, + "loss": 0.7507, + "step": 17570 + }, + { + "epoch": 0.94, + "learning_rate": 3.883418210044901e-05, + "loss": 0.7741, + "step": 17575 + }, + { + "epoch": 0.94, + "learning_rate": 3.882834851444451e-05, + "loss": 0.7969, + "step": 17580 + }, + { + "epoch": 0.94, + "learning_rate": 3.8822513843374367e-05, + "loss": 0.6521, + "step": 17585 + }, + { + "epoch": 0.94, + "learning_rate": 3.881667808769641e-05, + "loss": 0.7637, + "step": 17590 + }, + { + "epoch": 0.94, + "learning_rate": 3.8810841247868535e-05, + "loss": 0.7294, + "step": 17595 + }, + { + "epoch": 0.94, + "learning_rate": 3.8805003324348757e-05, + "loss": 0.7753, + "step": 17600 + }, + { + "epoch": 0.94, + "learning_rate": 3.879916431759516e-05, + "loss": 0.8253, + "step": 17605 + }, + { + "epoch": 0.94, + "learning_rate": 3.87933242280659e-05, + "loss": 0.7932, + "step": 17610 + }, + { + "epoch": 0.94, + "learning_rate": 3.878748305621923e-05, + "loss": 0.9105, + "step": 17615 + }, + { + "epoch": 0.94, + "learning_rate": 3.878164080251351e-05, + "loss": 0.7556, + "step": 17620 + }, + { + "epoch": 0.94, + "learning_rate": 3.877579746740714e-05, + "loss": 0.7761, + "step": 17625 + }, + { + "epoch": 0.94, + "learning_rate": 3.876995305135863e-05, + "loss": 0.8138, + "step": 17630 + }, + { + "epoch": 0.94, + "learning_rate": 3.876410755482658e-05, + "loss": 0.7631, + "step": 17635 + }, + { + "epoch": 0.94, + "learning_rate": 3.875826097826966e-05, + "loss": 0.8511, + "step": 17640 + }, + { + "epoch": 0.94, + "learning_rate": 3.875241332214664e-05, + "loss": 0.7067, + "step": 17645 + }, + { + "epoch": 0.94, + "learning_rate": 3.874656458691637e-05, + "loss": 0.8036, + "step": 17650 + }, + { + "epoch": 0.94, + "learning_rate": 3.874071477303775e-05, + "loss": 0.7485, + "step": 17655 + }, + { + "epoch": 0.94, + "learning_rate": 3.8734863880969844e-05, + "loss": 0.941, + "step": 17660 + }, + { + "epoch": 0.95, + "learning_rate": 3.872901191117172e-05, + "loss": 0.7004, + "step": 17665 + }, + { + "epoch": 0.95, + "learning_rate": 3.8723158864102566e-05, + "loss": 0.7929, + "step": 17670 + }, + { + "epoch": 0.95, + "learning_rate": 3.871730474022166e-05, + "loss": 0.8741, + "step": 17675 + }, + { + "epoch": 0.95, + "learning_rate": 3.871144953998835e-05, + "loss": 0.6594, + "step": 17680 + }, + { + "epoch": 0.95, + "learning_rate": 3.8705593263862085e-05, + "loss": 0.7829, + "step": 17685 + }, + { + "epoch": 0.95, + "learning_rate": 3.869973591230237e-05, + "loss": 0.7887, + "step": 17690 + }, + { + "epoch": 0.95, + "learning_rate": 3.869387748576884e-05, + "loss": 0.699, + "step": 17695 + }, + { + "epoch": 0.95, + "learning_rate": 3.868801798472115e-05, + "loss": 0.6287, + "step": 17700 + }, + { + "epoch": 0.95, + "learning_rate": 3.8682157409619105e-05, + "loss": 0.8502, + "step": 17705 + }, + { + "epoch": 0.95, + "learning_rate": 3.8676295760922555e-05, + "loss": 0.7976, + "step": 17710 + }, + { + "epoch": 0.95, + "learning_rate": 3.867043303909145e-05, + "loss": 0.8013, + "step": 17715 + }, + { + "epoch": 0.95, + "learning_rate": 3.86645692445858e-05, + "loss": 0.685, + "step": 17720 + }, + { + "epoch": 0.95, + "learning_rate": 3.865870437786574e-05, + "loss": 0.8897, + "step": 17725 + }, + { + "epoch": 0.95, + "learning_rate": 3.8652838439391464e-05, + "loss": 0.7566, + "step": 17730 + }, + { + "epoch": 0.95, + "learning_rate": 3.864697142962325e-05, + "loss": 0.6678, + "step": 17735 + }, + { + "epoch": 0.95, + "learning_rate": 3.864110334902145e-05, + "loss": 0.7299, + "step": 17740 + }, + { + "epoch": 0.95, + "learning_rate": 3.8635234198046534e-05, + "loss": 0.7878, + "step": 17745 + }, + { + "epoch": 0.95, + "learning_rate": 3.862936397715902e-05, + "loss": 0.7374, + "step": 17750 + }, + { + "epoch": 0.95, + "learning_rate": 3.862349268681954e-05, + "loss": 0.7431, + "step": 17755 + }, + { + "epoch": 0.95, + "learning_rate": 3.861762032748878e-05, + "loss": 0.7168, + "step": 17760 + }, + { + "epoch": 0.95, + "learning_rate": 3.8611746899627534e-05, + "loss": 0.7872, + "step": 17765 + }, + { + "epoch": 0.95, + "learning_rate": 3.860587240369666e-05, + "loss": 0.7851, + "step": 17770 + }, + { + "epoch": 0.95, + "learning_rate": 3.8599996840157126e-05, + "loss": 0.9775, + "step": 17775 + }, + { + "epoch": 0.95, + "learning_rate": 3.859412020946995e-05, + "loss": 0.7903, + "step": 17780 + }, + { + "epoch": 0.95, + "learning_rate": 3.858824251209628e-05, + "loss": 0.7083, + "step": 17785 + }, + { + "epoch": 0.95, + "learning_rate": 3.85823637484973e-05, + "loss": 0.8283, + "step": 17790 + }, + { + "epoch": 0.95, + "learning_rate": 3.8576483919134295e-05, + "loss": 0.7515, + "step": 17795 + }, + { + "epoch": 0.95, + "learning_rate": 3.857060302446864e-05, + "loss": 0.6571, + "step": 17800 + }, + { + "epoch": 0.95, + "learning_rate": 3.8564721064961794e-05, + "loss": 0.7269, + "step": 17805 + }, + { + "epoch": 0.95, + "learning_rate": 3.8558838041075296e-05, + "loss": 0.6345, + "step": 17810 + }, + { + "epoch": 0.95, + "learning_rate": 3.855295395327077e-05, + "loss": 0.7981, + "step": 17815 + }, + { + "epoch": 0.95, + "learning_rate": 3.85470688020099e-05, + "loss": 0.8889, + "step": 17820 + }, + { + "epoch": 0.95, + "learning_rate": 3.8541182587754495e-05, + "loss": 0.8363, + "step": 17825 + }, + { + "epoch": 0.95, + "learning_rate": 3.853529531096643e-05, + "loss": 0.7905, + "step": 17830 + }, + { + "epoch": 0.95, + "learning_rate": 3.852940697210765e-05, + "loss": 0.7161, + "step": 17835 + }, + { + "epoch": 0.95, + "learning_rate": 3.85235175716402e-05, + "loss": 0.6421, + "step": 17840 + }, + { + "epoch": 0.95, + "learning_rate": 3.85176271100262e-05, + "loss": 0.6763, + "step": 17845 + }, + { + "epoch": 0.95, + "learning_rate": 3.8511735587727846e-05, + "loss": 0.7565, + "step": 17850 + }, + { + "epoch": 0.96, + "learning_rate": 3.850584300520744e-05, + "loss": 0.8027, + "step": 17855 + }, + { + "epoch": 0.96, + "learning_rate": 3.8499949362927354e-05, + "loss": 0.8461, + "step": 17860 + }, + { + "epoch": 0.96, + "learning_rate": 3.849405466135003e-05, + "loss": 0.8408, + "step": 17865 + }, + { + "epoch": 0.96, + "learning_rate": 3.8488158900938016e-05, + "loss": 0.7571, + "step": 17870 + }, + { + "epoch": 0.96, + "learning_rate": 3.8482262082153934e-05, + "loss": 0.8898, + "step": 17875 + }, + { + "epoch": 0.96, + "learning_rate": 3.847636420546049e-05, + "loss": 0.8263, + "step": 17880 + }, + { + "epoch": 0.96, + "learning_rate": 3.8470465271320457e-05, + "loss": 0.7813, + "step": 17885 + }, + { + "epoch": 0.96, + "learning_rate": 3.846456528019672e-05, + "loss": 0.7865, + "step": 17890 + }, + { + "epoch": 0.96, + "learning_rate": 3.845866423255223e-05, + "loss": 0.7483, + "step": 17895 + }, + { + "epoch": 0.96, + "learning_rate": 3.8452762128850017e-05, + "loss": 0.6165, + "step": 17900 + }, + { + "epoch": 0.96, + "learning_rate": 3.84468589695532e-05, + "loss": 0.5934, + "step": 17905 + }, + { + "epoch": 0.96, + "learning_rate": 3.8440954755124994e-05, + "loss": 0.8084, + "step": 17910 + }, + { + "epoch": 0.96, + "learning_rate": 3.8435049486028665e-05, + "loss": 0.7367, + "step": 17915 + }, + { + "epoch": 0.96, + "learning_rate": 3.842914316272759e-05, + "loss": 0.7221, + "step": 17920 + }, + { + "epoch": 0.96, + "learning_rate": 3.842323578568522e-05, + "loss": 0.5363, + "step": 17925 + }, + { + "epoch": 0.96, + "learning_rate": 3.8417327355365086e-05, + "loss": 0.7407, + "step": 17930 + }, + { + "epoch": 0.96, + "learning_rate": 3.8411417872230805e-05, + "loss": 0.7741, + "step": 17935 + }, + { + "epoch": 0.96, + "learning_rate": 3.840550733674607e-05, + "loss": 0.8834, + "step": 17940 + }, + { + "epoch": 0.96, + "learning_rate": 3.839959574937466e-05, + "loss": 0.673, + "step": 17945 + }, + { + "epoch": 0.96, + "learning_rate": 3.839368311058045e-05, + "loss": 0.811, + "step": 17950 + }, + { + "epoch": 0.96, + "learning_rate": 3.8387769420827375e-05, + "loss": 0.7714, + "step": 17955 + }, + { + "epoch": 0.96, + "learning_rate": 3.838185468057947e-05, + "loss": 0.7051, + "step": 17960 + }, + { + "epoch": 0.96, + "learning_rate": 3.837593889030083e-05, + "loss": 0.7434, + "step": 17965 + }, + { + "epoch": 0.96, + "learning_rate": 3.837002205045568e-05, + "loss": 0.7249, + "step": 17970 + }, + { + "epoch": 0.96, + "learning_rate": 3.8364104161508256e-05, + "loss": 0.7987, + "step": 17975 + }, + { + "epoch": 0.96, + "learning_rate": 3.835818522392294e-05, + "loss": 0.7849, + "step": 17980 + }, + { + "epoch": 0.96, + "learning_rate": 3.835226523816417e-05, + "loss": 0.7924, + "step": 17985 + }, + { + "epoch": 0.96, + "learning_rate": 3.8346344204696465e-05, + "loss": 0.7745, + "step": 17990 + }, + { + "epoch": 0.96, + "learning_rate": 3.8340422123984424e-05, + "loss": 0.706, + "step": 17995 + }, + { + "epoch": 0.96, + "learning_rate": 3.833449899649274e-05, + "loss": 0.7746, + "step": 18000 + }, + { + "epoch": 0.96, + "learning_rate": 3.832857482268618e-05, + "loss": 0.7411, + "step": 18005 + }, + { + "epoch": 0.96, + "learning_rate": 3.8322649603029595e-05, + "loss": 0.602, + "step": 18010 + }, + { + "epoch": 0.96, + "learning_rate": 3.8316723337987906e-05, + "loss": 0.8662, + "step": 18015 + }, + { + "epoch": 0.96, + "learning_rate": 3.831079602802616e-05, + "loss": 0.7628, + "step": 18020 + }, + { + "epoch": 0.96, + "learning_rate": 3.830486767360941e-05, + "loss": 0.6366, + "step": 18025 + }, + { + "epoch": 0.96, + "learning_rate": 3.829893827520287e-05, + "loss": 0.7286, + "step": 18030 + }, + { + "epoch": 0.96, + "learning_rate": 3.8293007833271786e-05, + "loss": 0.8289, + "step": 18035 + }, + { + "epoch": 0.97, + "learning_rate": 3.8287076348281505e-05, + "loss": 0.7653, + "step": 18040 + }, + { + "epoch": 0.97, + "learning_rate": 3.8281143820697446e-05, + "loss": 0.8328, + "step": 18045 + }, + { + "epoch": 0.97, + "learning_rate": 3.827521025098512e-05, + "loss": 0.7595, + "step": 18050 + }, + { + "epoch": 0.97, + "learning_rate": 3.826927563961012e-05, + "loss": 0.6525, + "step": 18055 + }, + { + "epoch": 0.97, + "learning_rate": 3.826333998703809e-05, + "loss": 0.7187, + "step": 18060 + }, + { + "epoch": 0.97, + "learning_rate": 3.8257403293734816e-05, + "loss": 0.7774, + "step": 18065 + }, + { + "epoch": 0.97, + "learning_rate": 3.825146556016611e-05, + "loss": 0.7758, + "step": 18070 + }, + { + "epoch": 0.97, + "learning_rate": 3.82455267867979e-05, + "loss": 0.7693, + "step": 18075 + }, + { + "epoch": 0.97, + "learning_rate": 3.8239586974096164e-05, + "loss": 0.9131, + "step": 18080 + }, + { + "epoch": 0.97, + "learning_rate": 3.823364612252699e-05, + "loss": 0.7526, + "step": 18085 + }, + { + "epoch": 0.97, + "learning_rate": 3.8227704232556546e-05, + "loss": 0.7251, + "step": 18090 + }, + { + "epoch": 0.97, + "learning_rate": 3.822176130465105e-05, + "loss": 0.8181, + "step": 18095 + }, + { + "epoch": 0.97, + "learning_rate": 3.821581733927685e-05, + "loss": 0.7405, + "step": 18100 + }, + { + "epoch": 0.97, + "learning_rate": 3.8209872336900346e-05, + "loss": 0.7548, + "step": 18105 + }, + { + "epoch": 0.97, + "learning_rate": 3.8203926297988e-05, + "loss": 0.7436, + "step": 18110 + }, + { + "epoch": 0.97, + "learning_rate": 3.81979792230064e-05, + "loss": 0.8826, + "step": 18115 + }, + { + "epoch": 0.97, + "learning_rate": 3.8192031112422185e-05, + "loss": 0.7785, + "step": 18120 + }, + { + "epoch": 0.97, + "learning_rate": 3.8186081966702085e-05, + "loss": 0.8562, + "step": 18125 + }, + { + "epoch": 0.97, + "learning_rate": 3.818013178631291e-05, + "loss": 0.7341, + "step": 18130 + }, + { + "epoch": 0.97, + "learning_rate": 3.817418057172156e-05, + "loss": 0.8296, + "step": 18135 + }, + { + "epoch": 0.97, + "learning_rate": 3.816822832339499e-05, + "loss": 0.8363, + "step": 18140 + }, + { + "epoch": 0.97, + "learning_rate": 3.816227504180028e-05, + "loss": 0.7421, + "step": 18145 + }, + { + "epoch": 0.97, + "learning_rate": 3.815632072740454e-05, + "loss": 0.99, + "step": 18150 + }, + { + "epoch": 0.97, + "learning_rate": 3.815036538067499e-05, + "loss": 0.7078, + "step": 18155 + }, + { + "epoch": 0.97, + "learning_rate": 3.814440900207894e-05, + "loss": 0.7589, + "step": 18160 + }, + { + "epoch": 0.97, + "learning_rate": 3.8138451592083755e-05, + "loss": 0.8, + "step": 18165 + }, + { + "epoch": 0.97, + "learning_rate": 3.8132493151156894e-05, + "loss": 0.7336, + "step": 18170 + }, + { + "epoch": 0.97, + "learning_rate": 3.8126533679765894e-05, + "loss": 0.6928, + "step": 18175 + }, + { + "epoch": 0.97, + "learning_rate": 3.81205731783784e-05, + "loss": 0.8581, + "step": 18180 + }, + { + "epoch": 0.97, + "learning_rate": 3.8114611647462084e-05, + "loss": 0.6296, + "step": 18185 + }, + { + "epoch": 0.97, + "learning_rate": 3.810864908748474e-05, + "loss": 0.611, + "step": 18190 + }, + { + "epoch": 0.97, + "learning_rate": 3.8102685498914226e-05, + "loss": 0.8231, + "step": 18195 + }, + { + "epoch": 0.97, + "learning_rate": 3.80967208822185e-05, + "loss": 0.8017, + "step": 18200 + }, + { + "epoch": 0.97, + "learning_rate": 3.8090755237865556e-05, + "loss": 0.7067, + "step": 18205 + }, + { + "epoch": 0.97, + "learning_rate": 3.8084788566323524e-05, + "loss": 0.7478, + "step": 18210 + }, + { + "epoch": 0.97, + "learning_rate": 3.807882086806059e-05, + "loss": 0.784, + "step": 18215 + }, + { + "epoch": 0.97, + "learning_rate": 3.8072852143545015e-05, + "loss": 0.8031, + "step": 18220 + }, + { + "epoch": 0.97, + "learning_rate": 3.8066882393245126e-05, + "loss": 0.8042, + "step": 18225 + }, + { + "epoch": 0.98, + "learning_rate": 3.806091161762938e-05, + "loss": 0.7842, + "step": 18230 + }, + { + "epoch": 0.98, + "learning_rate": 3.8054939817166275e-05, + "loss": 0.6238, + "step": 18235 + }, + { + "epoch": 0.98, + "learning_rate": 3.804896699232439e-05, + "loss": 0.7415, + "step": 18240 + }, + { + "epoch": 0.98, + "learning_rate": 3.804299314357239e-05, + "loss": 0.7814, + "step": 18245 + }, + { + "epoch": 0.98, + "learning_rate": 3.803701827137905e-05, + "loss": 0.6005, + "step": 18250 + }, + { + "epoch": 0.98, + "learning_rate": 3.803104237621318e-05, + "loss": 0.6153, + "step": 18255 + }, + { + "epoch": 0.98, + "learning_rate": 3.802506545854367e-05, + "loss": 0.6509, + "step": 18260 + }, + { + "epoch": 0.98, + "learning_rate": 3.801908751883955e-05, + "loss": 0.7546, + "step": 18265 + }, + { + "epoch": 0.98, + "learning_rate": 3.801310855756986e-05, + "loss": 0.6083, + "step": 18270 + }, + { + "epoch": 0.98, + "learning_rate": 3.8007128575203765e-05, + "loss": 0.7606, + "step": 18275 + }, + { + "epoch": 0.98, + "learning_rate": 3.8001147572210485e-05, + "loss": 0.7806, + "step": 18280 + }, + { + "epoch": 0.98, + "learning_rate": 3.799516554905932e-05, + "loss": 0.8587, + "step": 18285 + }, + { + "epoch": 0.98, + "learning_rate": 3.798918250621969e-05, + "loss": 0.6479, + "step": 18290 + }, + { + "epoch": 0.98, + "learning_rate": 3.7983198444161045e-05, + "loss": 0.6683, + "step": 18295 + }, + { + "epoch": 0.98, + "learning_rate": 3.797721336335294e-05, + "loss": 0.7001, + "step": 18300 + }, + { + "epoch": 0.98, + "learning_rate": 3.7971227264264997e-05, + "loss": 0.8493, + "step": 18305 + }, + { + "epoch": 0.98, + "learning_rate": 3.796524014736694e-05, + "loss": 0.7354, + "step": 18310 + }, + { + "epoch": 0.98, + "learning_rate": 3.7959252013128546e-05, + "loss": 0.6859, + "step": 18315 + }, + { + "epoch": 0.98, + "learning_rate": 3.795326286201968e-05, + "loss": 0.7001, + "step": 18320 + }, + { + "epoch": 0.98, + "learning_rate": 3.794727269451032e-05, + "loss": 0.7015, + "step": 18325 + }, + { + "epoch": 0.98, + "learning_rate": 3.794128151107046e-05, + "loss": 0.7658, + "step": 18330 + }, + { + "epoch": 0.98, + "learning_rate": 3.793528931217023e-05, + "loss": 0.8859, + "step": 18335 + }, + { + "epoch": 0.98, + "learning_rate": 3.792929609827981e-05, + "loss": 0.7092, + "step": 18340 + }, + { + "epoch": 0.98, + "learning_rate": 3.792330186986947e-05, + "loss": 0.7721, + "step": 18345 + }, + { + "epoch": 0.98, + "learning_rate": 3.7917306627409556e-05, + "loss": 0.7075, + "step": 18350 + }, + { + "epoch": 0.98, + "learning_rate": 3.79113103713705e-05, + "loss": 0.8593, + "step": 18355 + }, + { + "epoch": 0.98, + "learning_rate": 3.7905313102222816e-05, + "loss": 0.7468, + "step": 18360 + }, + { + "epoch": 0.98, + "learning_rate": 3.789931482043707e-05, + "loss": 0.6434, + "step": 18365 + }, + { + "epoch": 0.98, + "learning_rate": 3.789331552648393e-05, + "loss": 0.7863, + "step": 18370 + }, + { + "epoch": 0.98, + "learning_rate": 3.788731522083416e-05, + "loss": 0.7467, + "step": 18375 + }, + { + "epoch": 0.98, + "learning_rate": 3.788131390395858e-05, + "loss": 0.6507, + "step": 18380 + }, + { + "epoch": 0.98, + "learning_rate": 3.787531157632808e-05, + "loss": 1.0363, + "step": 18385 + }, + { + "epoch": 0.98, + "learning_rate": 3.786930823841365e-05, + "loss": 0.9571, + "step": 18390 + }, + { + "epoch": 0.98, + "learning_rate": 3.786330389068635e-05, + "loss": 0.7973, + "step": 18395 + }, + { + "epoch": 0.98, + "learning_rate": 3.785729853361733e-05, + "loss": 0.9392, + "step": 18400 + }, + { + "epoch": 0.98, + "learning_rate": 3.785249352155273e-05, + "loss": 0.7905, + "step": 18405 + }, + { + "epoch": 0.98, + "learning_rate": 3.7846486348856136e-05, + "loss": 0.665, + "step": 18410 + }, + { + "epoch": 0.99, + "learning_rate": 3.784047816813743e-05, + "loss": 0.7184, + "step": 18415 + }, + { + "epoch": 0.99, + "learning_rate": 3.783446897986806e-05, + "loss": 0.6136, + "step": 18420 + }, + { + "epoch": 0.99, + "learning_rate": 3.782845878451955e-05, + "loss": 0.7457, + "step": 18425 + }, + { + "epoch": 0.99, + "learning_rate": 3.7822447582563505e-05, + "loss": 0.7722, + "step": 18430 + }, + { + "epoch": 0.99, + "learning_rate": 3.781643537447158e-05, + "loss": 0.5666, + "step": 18435 + }, + { + "epoch": 0.99, + "learning_rate": 3.781042216071555e-05, + "loss": 0.7349, + "step": 18440 + }, + { + "epoch": 0.99, + "learning_rate": 3.780440794176727e-05, + "loss": 0.6803, + "step": 18445 + }, + { + "epoch": 0.99, + "learning_rate": 3.779839271809863e-05, + "loss": 0.8977, + "step": 18450 + }, + { + "epoch": 0.99, + "learning_rate": 3.779237649018163e-05, + "loss": 0.712, + "step": 18455 + }, + { + "epoch": 0.99, + "learning_rate": 3.7786359258488356e-05, + "loss": 0.8406, + "step": 18460 + }, + { + "epoch": 0.99, + "learning_rate": 3.778034102349095e-05, + "loss": 0.6713, + "step": 18465 + }, + { + "epoch": 0.99, + "learning_rate": 3.777432178566164e-05, + "loss": 0.6923, + "step": 18470 + }, + { + "epoch": 0.99, + "learning_rate": 3.776830154547275e-05, + "loss": 0.8148, + "step": 18475 + }, + { + "epoch": 0.99, + "learning_rate": 3.776228030339666e-05, + "loss": 0.663, + "step": 18480 + }, + { + "epoch": 0.99, + "learning_rate": 3.775625805990583e-05, + "loss": 0.7632, + "step": 18485 + }, + { + "epoch": 0.99, + "learning_rate": 3.775023481547282e-05, + "loss": 0.6002, + "step": 18490 + }, + { + "epoch": 0.99, + "learning_rate": 3.774421057057025e-05, + "loss": 0.8781, + "step": 18495 + }, + { + "epoch": 0.99, + "learning_rate": 3.7738185325670815e-05, + "loss": 0.585, + "step": 18500 + }, + { + "epoch": 0.99, + "learning_rate": 3.773215908124731e-05, + "loss": 0.793, + "step": 18505 + }, + { + "epoch": 0.99, + "learning_rate": 3.772613183777258e-05, + "loss": 0.7079, + "step": 18510 + }, + { + "epoch": 0.99, + "learning_rate": 3.7720103595719586e-05, + "loss": 0.8202, + "step": 18515 + }, + { + "epoch": 0.99, + "learning_rate": 3.771407435556131e-05, + "loss": 0.6826, + "step": 18520 + }, + { + "epoch": 0.99, + "learning_rate": 3.770804411777088e-05, + "loss": 0.7195, + "step": 18525 + }, + { + "epoch": 0.99, + "learning_rate": 3.770201288282145e-05, + "loss": 0.7974, + "step": 18530 + }, + { + "epoch": 0.99, + "learning_rate": 3.769598065118628e-05, + "loss": 0.6449, + "step": 18535 + }, + { + "epoch": 0.99, + "learning_rate": 3.7689947423338686e-05, + "loss": 0.7252, + "step": 18540 + }, + { + "epoch": 0.99, + "learning_rate": 3.768391319975211e-05, + "loss": 0.8655, + "step": 18545 + }, + { + "epoch": 0.99, + "learning_rate": 3.7677877980900004e-05, + "loss": 0.7962, + "step": 18550 + }, + { + "epoch": 0.99, + "learning_rate": 3.7671841767255935e-05, + "loss": 0.9109, + "step": 18555 + }, + { + "epoch": 0.99, + "learning_rate": 3.766580455929355e-05, + "loss": 0.802, + "step": 18560 + }, + { + "epoch": 0.99, + "learning_rate": 3.765976635748658e-05, + "loss": 0.7244, + "step": 18565 + }, + { + "epoch": 0.99, + "learning_rate": 3.765372716230881e-05, + "loss": 0.8154, + "step": 18570 + }, + { + "epoch": 0.99, + "learning_rate": 3.7647686974234125e-05, + "loss": 0.9034, + "step": 18575 + }, + { + "epoch": 0.99, + "learning_rate": 3.764164579373647e-05, + "loss": 0.7674, + "step": 18580 + }, + { + "epoch": 0.99, + "learning_rate": 3.763560362128989e-05, + "loss": 0.79, + "step": 18585 + }, + { + "epoch": 0.99, + "learning_rate": 3.762956045736848e-05, + "loss": 0.7604, + "step": 18590 + }, + { + "epoch": 0.99, + "learning_rate": 3.762351630244643e-05, + "loss": 0.5801, + "step": 18595 + }, + { + "epoch": 1.0, + "learning_rate": 3.7617471156998015e-05, + "loss": 0.7421, + "step": 18600 + }, + { + "epoch": 1.0, + "learning_rate": 3.761142502149758e-05, + "loss": 0.6293, + "step": 18605 + }, + { + "epoch": 1.0, + "learning_rate": 3.760537789641952e-05, + "loss": 0.761, + "step": 18610 + }, + { + "epoch": 1.0, + "learning_rate": 3.7599329782238366e-05, + "loss": 0.7081, + "step": 18615 + }, + { + "epoch": 1.0, + "learning_rate": 3.759328067942867e-05, + "loss": 0.6849, + "step": 18620 + }, + { + "epoch": 1.0, + "learning_rate": 3.7587230588465095e-05, + "loss": 0.7673, + "step": 18625 + }, + { + "epoch": 1.0, + "learning_rate": 3.7581179509822376e-05, + "loss": 0.6882, + "step": 18630 + }, + { + "epoch": 1.0, + "learning_rate": 3.7575127443975314e-05, + "loss": 0.6605, + "step": 18635 + }, + { + "epoch": 1.0, + "learning_rate": 3.75690743913988e-05, + "loss": 0.6872, + "step": 18640 + }, + { + "epoch": 1.0, + "learning_rate": 3.756302035256779e-05, + "loss": 0.9828, + "step": 18645 + }, + { + "epoch": 1.0, + "learning_rate": 3.755696532795733e-05, + "loss": 0.7231, + "step": 18650 + }, + { + "epoch": 1.0, + "learning_rate": 3.7550909318042547e-05, + "loss": 0.8458, + "step": 18655 + }, + { + "epoch": 1.0, + "learning_rate": 3.754485232329862e-05, + "loss": 0.6933, + "step": 18660 + }, + { + "epoch": 1.0, + "learning_rate": 3.7538794344200834e-05, + "loss": 0.7562, + "step": 18665 + }, + { + "epoch": 1.0, + "learning_rate": 3.753273538122453e-05, + "loss": 0.7589, + "step": 18670 + }, + { + "epoch": 1.0, + "learning_rate": 3.7526675434845146e-05, + "loss": 0.8413, + "step": 18675 + }, + { + "epoch": 1.0, + "learning_rate": 3.752061450553816e-05, + "loss": 0.6979, + "step": 18680 + }, + { + "epoch": 1.0, + "learning_rate": 3.7514552593779193e-05, + "loss": 0.7118, + "step": 18685 + }, + { + "epoch": 1.0, + "learning_rate": 3.750848970004388e-05, + "loss": 0.6427, + "step": 18690 + }, + { + "epoch": 1.0, + "learning_rate": 3.7502425824807965e-05, + "loss": 0.7137, + "step": 18695 + }, + { + "epoch": 1.0, + "learning_rate": 3.749636096854724e-05, + "loss": 0.7334, + "step": 18700 + }, + { + "epoch": 1.0, + "learning_rate": 3.749029513173763e-05, + "loss": 0.6886, + "step": 18705 + }, + { + "epoch": 1.0, + "learning_rate": 3.748422831485507e-05, + "loss": 0.7472, + "step": 18710 + }, + { + "epoch": 1.0, + "learning_rate": 3.747816051837561e-05, + "loss": 0.7642, + "step": 18715 + }, + { + "epoch": 1.0, + "learning_rate": 3.7472091742775394e-05, + "loss": 0.7597, + "step": 18720 + }, + { + "epoch": 1.0, + "learning_rate": 3.746602198853059e-05, + "loss": 0.7698, + "step": 18725 + }, + { + "epoch": 1.0, + "learning_rate": 3.7459951256117484e-05, + "loss": 0.8039, + "step": 18730 + }, + { + "epoch": 1.0, + "learning_rate": 3.745387954601243e-05, + "loss": 0.7821, + "step": 18735 + }, + { + "epoch": 1.0, + "learning_rate": 3.7447806858691835e-05, + "loss": 0.5738, + "step": 18740 + }, + { + "epoch": 1.0, + "learning_rate": 3.744173319463224e-05, + "loss": 0.8147, + "step": 18745 + }, + { + "epoch": 1.0, + "learning_rate": 3.7435658554310195e-05, + "loss": 0.6916, + "step": 18750 + }, + { + "epoch": 1.0, + "learning_rate": 3.7429582938202365e-05, + "loss": 0.633, + "step": 18755 + }, + { + "epoch": 1.0, + "learning_rate": 3.742350634678549e-05, + "loss": 0.6888, + "step": 18760 + }, + { + "epoch": 1.0, + "learning_rate": 3.741742878053637e-05, + "loss": 0.6898, + "step": 18765 + }, + { + "epoch": 1.0, + "learning_rate": 3.741135023993191e-05, + "loss": 0.7246, + "step": 18770 + }, + { + "epoch": 1.0, + "learning_rate": 3.7405270725449056e-05, + "loss": 0.7306, + "step": 18775 + }, + { + "epoch": 1.0, + "learning_rate": 3.739919023756485e-05, + "loss": 0.8678, + "step": 18780 + }, + { + "epoch": 1.0, + "learning_rate": 3.7393108776756414e-05, + "loss": 0.8372, + "step": 18785 + }, + { + "epoch": 1.01, + "learning_rate": 3.7387026343500934e-05, + "loss": 0.6038, + "step": 18790 + }, + { + "epoch": 1.01, + "learning_rate": 3.738094293827569e-05, + "loss": 0.7692, + "step": 18795 + }, + { + "epoch": 1.01, + "learning_rate": 3.737485856155802e-05, + "loss": 0.7382, + "step": 18800 + }, + { + "epoch": 1.01, + "learning_rate": 3.736877321382534e-05, + "loss": 0.8407, + "step": 18805 + }, + { + "epoch": 1.01, + "learning_rate": 3.736268689555516e-05, + "loss": 0.7166, + "step": 18810 + }, + { + "epoch": 1.01, + "learning_rate": 3.7356599607225036e-05, + "loss": 0.7035, + "step": 18815 + }, + { + "epoch": 1.01, + "learning_rate": 3.735051134931263e-05, + "loss": 0.693, + "step": 18820 + }, + { + "epoch": 1.01, + "learning_rate": 3.7344422122295675e-05, + "loss": 0.7892, + "step": 18825 + }, + { + "epoch": 1.01, + "learning_rate": 3.7338331926651957e-05, + "loss": 0.6281, + "step": 18830 + }, + { + "epoch": 1.01, + "learning_rate": 3.733224076285936e-05, + "loss": 0.6696, + "step": 18835 + }, + { + "epoch": 1.01, + "learning_rate": 3.732614863139585e-05, + "loss": 0.748, + "step": 18840 + }, + { + "epoch": 1.01, + "learning_rate": 3.7320055532739426e-05, + "loss": 0.7578, + "step": 18845 + }, + { + "epoch": 1.01, + "learning_rate": 3.7313961467368225e-05, + "loss": 0.7734, + "step": 18850 + }, + { + "epoch": 1.01, + "learning_rate": 3.730786643576042e-05, + "loss": 0.7208, + "step": 18855 + }, + { + "epoch": 1.01, + "learning_rate": 3.730177043839426e-05, + "loss": 0.624, + "step": 18860 + }, + { + "epoch": 1.01, + "learning_rate": 3.7295673475748085e-05, + "loss": 0.631, + "step": 18865 + }, + { + "epoch": 1.01, + "learning_rate": 3.72895755483003e-05, + "loss": 0.7402, + "step": 18870 + }, + { + "epoch": 1.01, + "learning_rate": 3.7283476656529394e-05, + "loss": 0.7745, + "step": 18875 + }, + { + "epoch": 1.01, + "learning_rate": 3.727737680091392e-05, + "loss": 0.7307, + "step": 18880 + }, + { + "epoch": 1.01, + "learning_rate": 3.727127598193252e-05, + "loss": 0.6934, + "step": 18885 + }, + { + "epoch": 1.01, + "learning_rate": 3.726517420006391e-05, + "loss": 0.706, + "step": 18890 + }, + { + "epoch": 1.01, + "learning_rate": 3.725907145578687e-05, + "loss": 0.5992, + "step": 18895 + }, + { + "epoch": 1.01, + "learning_rate": 3.725296774958026e-05, + "loss": 0.7091, + "step": 18900 + }, + { + "epoch": 1.01, + "learning_rate": 3.7246863081923024e-05, + "loss": 0.6441, + "step": 18905 + }, + { + "epoch": 1.01, + "learning_rate": 3.7240757453294174e-05, + "loss": 0.8, + "step": 18910 + }, + { + "epoch": 1.01, + "learning_rate": 3.7234650864172795e-05, + "loss": 0.6456, + "step": 18915 + }, + { + "epoch": 1.01, + "learning_rate": 3.722854331503806e-05, + "loss": 0.7247, + "step": 18920 + }, + { + "epoch": 1.01, + "learning_rate": 3.72224348063692e-05, + "loss": 0.6949, + "step": 18925 + }, + { + "epoch": 1.01, + "learning_rate": 3.721632533864553e-05, + "loss": 0.6897, + "step": 18930 + }, + { + "epoch": 1.01, + "learning_rate": 3.721021491234644e-05, + "loss": 0.6947, + "step": 18935 + }, + { + "epoch": 1.01, + "learning_rate": 3.720410352795141e-05, + "loss": 0.9082, + "step": 18940 + }, + { + "epoch": 1.01, + "learning_rate": 3.719799118593996e-05, + "loss": 0.7506, + "step": 18945 + }, + { + "epoch": 1.01, + "learning_rate": 3.719187788679172e-05, + "loss": 0.6098, + "step": 18950 + }, + { + "epoch": 1.01, + "learning_rate": 3.718576363098637e-05, + "loss": 0.6518, + "step": 18955 + }, + { + "epoch": 1.01, + "learning_rate": 3.717964841900369e-05, + "loss": 0.683, + "step": 18960 + }, + { + "epoch": 1.01, + "learning_rate": 3.71735322513235e-05, + "loss": 0.5155, + "step": 18965 + }, + { + "epoch": 1.01, + "learning_rate": 3.716741512842573e-05, + "loss": 0.7194, + "step": 18970 + }, + { + "epoch": 1.02, + "learning_rate": 3.716129705079037e-05, + "loss": 0.8037, + "step": 18975 + }, + { + "epoch": 1.02, + "learning_rate": 3.715517801889749e-05, + "loss": 0.7316, + "step": 18980 + }, + { + "epoch": 1.02, + "learning_rate": 3.714905803322722e-05, + "loss": 0.5391, + "step": 18985 + }, + { + "epoch": 1.02, + "learning_rate": 3.714293709425978e-05, + "loss": 0.7811, + "step": 18990 + }, + { + "epoch": 1.02, + "learning_rate": 3.713681520247546e-05, + "loss": 0.7145, + "step": 18995 + }, + { + "epoch": 1.02, + "learning_rate": 3.713069235835463e-05, + "loss": 0.7411, + "step": 19000 + }, + { + "epoch": 1.02, + "learning_rate": 3.712456856237772e-05, + "loss": 0.7058, + "step": 19005 + }, + { + "epoch": 1.02, + "learning_rate": 3.711844381502525e-05, + "loss": 0.7548, + "step": 19010 + }, + { + "epoch": 1.02, + "learning_rate": 3.711231811677781e-05, + "loss": 0.8202, + "step": 19015 + }, + { + "epoch": 1.02, + "learning_rate": 3.710619146811606e-05, + "loss": 0.7619, + "step": 19020 + }, + { + "epoch": 1.02, + "learning_rate": 3.710006386952074e-05, + "loss": 0.748, + "step": 19025 + }, + { + "epoch": 1.02, + "learning_rate": 3.709393532147267e-05, + "loss": 0.7685, + "step": 19030 + }, + { + "epoch": 1.02, + "learning_rate": 3.7087805824452724e-05, + "loss": 0.6781, + "step": 19035 + }, + { + "epoch": 1.02, + "learning_rate": 3.708167537894187e-05, + "loss": 0.6863, + "step": 19040 + }, + { + "epoch": 1.02, + "learning_rate": 3.707554398542115e-05, + "loss": 0.7509, + "step": 19045 + }, + { + "epoch": 1.02, + "learning_rate": 3.7069411644371666e-05, + "loss": 0.786, + "step": 19050 + }, + { + "epoch": 1.02, + "learning_rate": 3.70632783562746e-05, + "loss": 0.6377, + "step": 19055 + }, + { + "epoch": 1.02, + "learning_rate": 3.705714412161123e-05, + "loss": 0.7119, + "step": 19060 + }, + { + "epoch": 1.02, + "learning_rate": 3.705100894086286e-05, + "loss": 0.5874, + "step": 19065 + }, + { + "epoch": 1.02, + "learning_rate": 3.704487281451093e-05, + "loss": 0.772, + "step": 19070 + }, + { + "epoch": 1.02, + "learning_rate": 3.70387357430369e-05, + "loss": 0.7138, + "step": 19075 + }, + { + "epoch": 1.02, + "learning_rate": 3.703259772692233e-05, + "loss": 0.7787, + "step": 19080 + }, + { + "epoch": 1.02, + "learning_rate": 3.702645876664886e-05, + "loss": 0.8605, + "step": 19085 + }, + { + "epoch": 1.02, + "learning_rate": 3.702031886269818e-05, + "loss": 0.6587, + "step": 19090 + }, + { + "epoch": 1.02, + "learning_rate": 3.701417801555208e-05, + "loss": 0.816, + "step": 19095 + }, + { + "epoch": 1.02, + "learning_rate": 3.70080362256924e-05, + "loss": 0.8239, + "step": 19100 + }, + { + "epoch": 1.02, + "learning_rate": 3.700189349360109e-05, + "loss": 0.7396, + "step": 19105 + }, + { + "epoch": 1.02, + "learning_rate": 3.6995749819760116e-05, + "loss": 0.7019, + "step": 19110 + }, + { + "epoch": 1.02, + "learning_rate": 3.698960520465158e-05, + "loss": 0.6598, + "step": 19115 + }, + { + "epoch": 1.02, + "learning_rate": 3.6983459648757615e-05, + "loss": 0.8095, + "step": 19120 + }, + { + "epoch": 1.02, + "learning_rate": 3.697731315256046e-05, + "loss": 0.7334, + "step": 19125 + }, + { + "epoch": 1.02, + "learning_rate": 3.6971165716542386e-05, + "loss": 0.669, + "step": 19130 + }, + { + "epoch": 1.02, + "learning_rate": 3.696501734118577e-05, + "loss": 0.6063, + "step": 19135 + }, + { + "epoch": 1.02, + "learning_rate": 3.6958868026973075e-05, + "loss": 0.7111, + "step": 19140 + }, + { + "epoch": 1.02, + "learning_rate": 3.69527177743868e-05, + "loss": 0.606, + "step": 19145 + }, + { + "epoch": 1.02, + "learning_rate": 3.694656658390954e-05, + "loss": 0.7113, + "step": 19150 + }, + { + "epoch": 1.02, + "learning_rate": 3.694041445602394e-05, + "loss": 0.6619, + "step": 19155 + }, + { + "epoch": 1.02, + "learning_rate": 3.6934261391212775e-05, + "loss": 0.7227, + "step": 19160 + }, + { + "epoch": 1.03, + "learning_rate": 3.6928107389958826e-05, + "loss": 0.7112, + "step": 19165 + }, + { + "epoch": 1.03, + "learning_rate": 3.692195245274499e-05, + "loss": 0.721, + "step": 19170 + }, + { + "epoch": 1.03, + "learning_rate": 3.6915796580054226e-05, + "loss": 0.6568, + "step": 19175 + }, + { + "epoch": 1.03, + "learning_rate": 3.690963977236956e-05, + "loss": 0.5884, + "step": 19180 + }, + { + "epoch": 1.03, + "learning_rate": 3.690348203017409e-05, + "loss": 0.668, + "step": 19185 + }, + { + "epoch": 1.03, + "learning_rate": 3.6897323353951006e-05, + "loss": 0.7781, + "step": 19190 + }, + { + "epoch": 1.03, + "learning_rate": 3.6891163744183566e-05, + "loss": 0.7888, + "step": 19195 + }, + { + "epoch": 1.03, + "learning_rate": 3.688500320135507e-05, + "loss": 0.585, + "step": 19200 + }, + { + "epoch": 1.03, + "learning_rate": 3.6878841725948946e-05, + "loss": 0.7379, + "step": 19205 + }, + { + "epoch": 1.03, + "learning_rate": 3.6872679318448636e-05, + "loss": 0.6995, + "step": 19210 + }, + { + "epoch": 1.03, + "learning_rate": 3.686651597933771e-05, + "loss": 0.7771, + "step": 19215 + }, + { + "epoch": 1.03, + "learning_rate": 3.6860351709099754e-05, + "loss": 0.6342, + "step": 19220 + }, + { + "epoch": 1.03, + "learning_rate": 3.685418650821849e-05, + "loss": 0.7564, + "step": 19225 + }, + { + "epoch": 1.03, + "learning_rate": 3.6848020377177664e-05, + "loss": 0.5561, + "step": 19230 + }, + { + "epoch": 1.03, + "learning_rate": 3.684185331646112e-05, + "loss": 0.7206, + "step": 19235 + }, + { + "epoch": 1.03, + "learning_rate": 3.683568532655276e-05, + "loss": 0.7525, + "step": 19240 + }, + { + "epoch": 1.03, + "learning_rate": 3.682951640793657e-05, + "loss": 0.7429, + "step": 19245 + }, + { + "epoch": 1.03, + "learning_rate": 3.682334656109661e-05, + "loss": 0.8073, + "step": 19250 + }, + { + "epoch": 1.03, + "learning_rate": 3.6817175786517e-05, + "loss": 0.6816, + "step": 19255 + }, + { + "epoch": 1.03, + "learning_rate": 3.6811004084681944e-05, + "loss": 0.6836, + "step": 19260 + }, + { + "epoch": 1.03, + "learning_rate": 3.6804831456075704e-05, + "loss": 0.7229, + "step": 19265 + }, + { + "epoch": 1.03, + "learning_rate": 3.679865790118265e-05, + "loss": 0.611, + "step": 19270 + }, + { + "epoch": 1.03, + "learning_rate": 3.679248342048719e-05, + "loss": 0.7383, + "step": 19275 + }, + { + "epoch": 1.03, + "learning_rate": 3.67863080144738e-05, + "loss": 0.6316, + "step": 19280 + }, + { + "epoch": 1.03, + "learning_rate": 3.6780131683627066e-05, + "loss": 0.6876, + "step": 19285 + }, + { + "epoch": 1.03, + "learning_rate": 3.677395442843162e-05, + "loss": 0.7016, + "step": 19290 + }, + { + "epoch": 1.03, + "learning_rate": 3.676777624937216e-05, + "loss": 0.6569, + "step": 19295 + }, + { + "epoch": 1.03, + "learning_rate": 3.676159714693347e-05, + "loss": 0.6753, + "step": 19300 + }, + { + "epoch": 1.03, + "learning_rate": 3.675541712160042e-05, + "loss": 0.7971, + "step": 19305 + }, + { + "epoch": 1.03, + "learning_rate": 3.674923617385792e-05, + "loss": 0.6965, + "step": 19310 + }, + { + "epoch": 1.03, + "learning_rate": 3.674305430419097e-05, + "loss": 0.7058, + "step": 19315 + }, + { + "epoch": 1.03, + "learning_rate": 3.6736871513084656e-05, + "loss": 0.715, + "step": 19320 + }, + { + "epoch": 1.03, + "learning_rate": 3.673068780102411e-05, + "loss": 0.6447, + "step": 19325 + }, + { + "epoch": 1.03, + "learning_rate": 3.672450316849454e-05, + "loss": 0.6567, + "step": 19330 + }, + { + "epoch": 1.03, + "learning_rate": 3.6718317615981255e-05, + "loss": 0.7121, + "step": 19335 + }, + { + "epoch": 1.03, + "learning_rate": 3.6712131143969596e-05, + "loss": 0.6963, + "step": 19340 + }, + { + "epoch": 1.03, + "learning_rate": 3.6705943752945e-05, + "loss": 0.8069, + "step": 19345 + }, + { + "epoch": 1.04, + "learning_rate": 3.6699755443392983e-05, + "loss": 0.6849, + "step": 19350 + }, + { + "epoch": 1.04, + "learning_rate": 3.669356621579911e-05, + "loss": 0.7365, + "step": 19355 + }, + { + "epoch": 1.04, + "learning_rate": 3.6687376070649024e-05, + "loss": 0.6641, + "step": 19360 + }, + { + "epoch": 1.04, + "learning_rate": 3.668118500842846e-05, + "loss": 0.6893, + "step": 19365 + }, + { + "epoch": 1.04, + "learning_rate": 3.6674993029623207e-05, + "loss": 0.7242, + "step": 19370 + }, + { + "epoch": 1.04, + "learning_rate": 3.666880013471913e-05, + "loss": 0.678, + "step": 19375 + }, + { + "epoch": 1.04, + "learning_rate": 3.6662606324202145e-05, + "loss": 0.7119, + "step": 19380 + }, + { + "epoch": 1.04, + "learning_rate": 3.665641159855829e-05, + "loss": 0.6684, + "step": 19385 + }, + { + "epoch": 1.04, + "learning_rate": 3.665021595827364e-05, + "loss": 0.6933, + "step": 19390 + }, + { + "epoch": 1.04, + "learning_rate": 3.664401940383433e-05, + "loss": 0.6098, + "step": 19395 + }, + { + "epoch": 1.04, + "learning_rate": 3.663782193572659e-05, + "loss": 0.7141, + "step": 19400 + }, + { + "epoch": 1.04, + "learning_rate": 3.6631623554436725e-05, + "loss": 0.5819, + "step": 19405 + }, + { + "epoch": 1.04, + "learning_rate": 3.6625424260451094e-05, + "loss": 0.8023, + "step": 19410 + }, + { + "epoch": 1.04, + "learning_rate": 3.6619224054256135e-05, + "loss": 0.7077, + "step": 19415 + }, + { + "epoch": 1.04, + "learning_rate": 3.661302293633836e-05, + "loss": 0.8241, + "step": 19420 + }, + { + "epoch": 1.04, + "learning_rate": 3.660682090718435e-05, + "loss": 0.721, + "step": 19425 + }, + { + "epoch": 1.04, + "learning_rate": 3.6600617967280756e-05, + "loss": 0.7919, + "step": 19430 + }, + { + "epoch": 1.04, + "learning_rate": 3.6594414117114314e-05, + "loss": 0.7374, + "step": 19435 + }, + { + "epoch": 1.04, + "learning_rate": 3.65882093571718e-05, + "loss": 0.9226, + "step": 19440 + }, + { + "epoch": 1.04, + "learning_rate": 3.65820036879401e-05, + "loss": 0.6172, + "step": 19445 + }, + { + "epoch": 1.04, + "learning_rate": 3.657579710990614e-05, + "loss": 0.7177, + "step": 19450 + }, + { + "epoch": 1.04, + "learning_rate": 3.656958962355693e-05, + "loss": 0.5644, + "step": 19455 + }, + { + "epoch": 1.04, + "learning_rate": 3.6563381229379576e-05, + "loss": 0.6877, + "step": 19460 + }, + { + "epoch": 1.04, + "learning_rate": 3.655717192786119e-05, + "loss": 0.7084, + "step": 19465 + }, + { + "epoch": 1.04, + "learning_rate": 3.655096171948903e-05, + "loss": 0.7819, + "step": 19470 + }, + { + "epoch": 1.04, + "learning_rate": 3.654475060475037e-05, + "loss": 0.8572, + "step": 19475 + }, + { + "epoch": 1.04, + "learning_rate": 3.653853858413259e-05, + "loss": 0.6891, + "step": 19480 + }, + { + "epoch": 1.04, + "learning_rate": 3.653232565812311e-05, + "loss": 0.6481, + "step": 19485 + }, + { + "epoch": 1.04, + "learning_rate": 3.652611182720946e-05, + "loss": 0.6192, + "step": 19490 + }, + { + "epoch": 1.04, + "learning_rate": 3.651989709187921e-05, + "loss": 0.7298, + "step": 19495 + }, + { + "epoch": 1.04, + "learning_rate": 3.6513681452619994e-05, + "loss": 0.7542, + "step": 19500 + }, + { + "epoch": 1.04, + "learning_rate": 3.650746490991956e-05, + "loss": 0.7275, + "step": 19505 + }, + { + "epoch": 1.04, + "learning_rate": 3.6501247464265674e-05, + "loss": 0.7142, + "step": 19510 + }, + { + "epoch": 1.04, + "learning_rate": 3.649502911614623e-05, + "loss": 0.7425, + "step": 19515 + }, + { + "epoch": 1.04, + "learning_rate": 3.6488809866049135e-05, + "loss": 0.7094, + "step": 19520 + }, + { + "epoch": 1.04, + "learning_rate": 3.64825897144624e-05, + "loss": 0.7883, + "step": 19525 + }, + { + "epoch": 1.04, + "learning_rate": 3.64763686618741e-05, + "loss": 0.6493, + "step": 19530 + }, + { + "epoch": 1.05, + "learning_rate": 3.6470146708772384e-05, + "loss": 0.6927, + "step": 19535 + }, + { + "epoch": 1.05, + "learning_rate": 3.646392385564547e-05, + "loss": 0.5466, + "step": 19540 + }, + { + "epoch": 1.05, + "learning_rate": 3.645770010298164e-05, + "loss": 0.6321, + "step": 19545 + }, + { + "epoch": 1.05, + "learning_rate": 3.645147545126926e-05, + "loss": 0.8171, + "step": 19550 + }, + { + "epoch": 1.05, + "learning_rate": 3.644524990099675e-05, + "loss": 0.5653, + "step": 19555 + }, + { + "epoch": 1.05, + "learning_rate": 3.64390234526526e-05, + "loss": 0.7726, + "step": 19560 + }, + { + "epoch": 1.05, + "learning_rate": 3.6432796106725396e-05, + "loss": 0.6603, + "step": 19565 + }, + { + "epoch": 1.05, + "learning_rate": 3.642656786370378e-05, + "loss": 0.6843, + "step": 19570 + }, + { + "epoch": 1.05, + "learning_rate": 3.642033872407645e-05, + "loss": 0.6938, + "step": 19575 + }, + { + "epoch": 1.05, + "learning_rate": 3.6414108688332186e-05, + "loss": 0.677, + "step": 19580 + }, + { + "epoch": 1.05, + "learning_rate": 3.640787775695985e-05, + "loss": 0.6901, + "step": 19585 + }, + { + "epoch": 1.05, + "learning_rate": 3.6401645930448356e-05, + "loss": 0.671, + "step": 19590 + }, + { + "epoch": 1.05, + "learning_rate": 3.6395413209286686e-05, + "loss": 0.741, + "step": 19595 + }, + { + "epoch": 1.05, + "learning_rate": 3.6389179593963914e-05, + "loss": 0.8225, + "step": 19600 + }, + { + "epoch": 1.05, + "learning_rate": 3.6382945084969165e-05, + "loss": 0.715, + "step": 19605 + }, + { + "epoch": 1.05, + "learning_rate": 3.637670968279165e-05, + "loss": 0.6918, + "step": 19610 + }, + { + "epoch": 1.05, + "learning_rate": 3.637047338792063e-05, + "loss": 0.6316, + "step": 19615 + }, + { + "epoch": 1.05, + "learning_rate": 3.6364236200845456e-05, + "loss": 0.7644, + "step": 19620 + }, + { + "epoch": 1.05, + "learning_rate": 3.635799812205554e-05, + "loss": 0.659, + "step": 19625 + }, + { + "epoch": 1.05, + "learning_rate": 3.6351759152040355e-05, + "loss": 0.6877, + "step": 19630 + }, + { + "epoch": 1.05, + "learning_rate": 3.634551929128945e-05, + "loss": 0.7445, + "step": 19635 + }, + { + "epoch": 1.05, + "learning_rate": 3.633927854029246e-05, + "loss": 0.7579, + "step": 19640 + }, + { + "epoch": 1.05, + "learning_rate": 3.633303689953907e-05, + "loss": 0.8384, + "step": 19645 + }, + { + "epoch": 1.05, + "learning_rate": 3.6326794369519034e-05, + "loss": 0.6814, + "step": 19650 + }, + { + "epoch": 1.05, + "learning_rate": 3.6320550950722197e-05, + "loss": 0.7199, + "step": 19655 + }, + { + "epoch": 1.05, + "learning_rate": 3.631430664363846e-05, + "loss": 0.7483, + "step": 19660 + }, + { + "epoch": 1.05, + "learning_rate": 3.6308061448757776e-05, + "loss": 0.5444, + "step": 19665 + }, + { + "epoch": 1.05, + "learning_rate": 3.630181536657019e-05, + "loss": 0.7758, + "step": 19670 + }, + { + "epoch": 1.05, + "learning_rate": 3.629556839756583e-05, + "loss": 0.7804, + "step": 19675 + }, + { + "epoch": 1.05, + "learning_rate": 3.628932054223486e-05, + "loss": 0.7373, + "step": 19680 + }, + { + "epoch": 1.05, + "learning_rate": 3.6283071801067524e-05, + "loss": 0.6756, + "step": 19685 + }, + { + "epoch": 1.05, + "learning_rate": 3.6276822174554156e-05, + "loss": 0.6399, + "step": 19690 + }, + { + "epoch": 1.05, + "learning_rate": 3.627057166318514e-05, + "loss": 0.7184, + "step": 19695 + }, + { + "epoch": 1.05, + "learning_rate": 3.626432026745092e-05, + "loss": 0.7802, + "step": 19700 + }, + { + "epoch": 1.05, + "learning_rate": 3.6258067987842045e-05, + "loss": 0.8092, + "step": 19705 + }, + { + "epoch": 1.05, + "learning_rate": 3.625181482484908e-05, + "loss": 0.7534, + "step": 19710 + }, + { + "epoch": 1.05, + "learning_rate": 3.624556077896273e-05, + "loss": 0.6718, + "step": 19715 + }, + { + "epoch": 1.05, + "learning_rate": 3.62393058506737e-05, + "loss": 0.5859, + "step": 19720 + }, + { + "epoch": 1.06, + "learning_rate": 3.62330500404728e-05, + "loss": 0.7219, + "step": 19725 + }, + { + "epoch": 1.06, + "learning_rate": 3.622679334885091e-05, + "loss": 0.7059, + "step": 19730 + }, + { + "epoch": 1.06, + "learning_rate": 3.622053577629896e-05, + "loss": 0.6691, + "step": 19735 + }, + { + "epoch": 1.06, + "learning_rate": 3.621427732330797e-05, + "loss": 0.6074, + "step": 19740 + }, + { + "epoch": 1.06, + "learning_rate": 3.6208017990369024e-05, + "loss": 0.7223, + "step": 19745 + }, + { + "epoch": 1.06, + "learning_rate": 3.620175777797328e-05, + "loss": 0.6325, + "step": 19750 + }, + { + "epoch": 1.06, + "learning_rate": 3.6195496686611926e-05, + "loss": 0.7658, + "step": 19755 + }, + { + "epoch": 1.06, + "learning_rate": 3.618923471677626e-05, + "loss": 0.6585, + "step": 19760 + }, + { + "epoch": 1.06, + "learning_rate": 3.618297186895767e-05, + "loss": 0.6261, + "step": 19765 + }, + { + "epoch": 1.06, + "learning_rate": 3.617670814364754e-05, + "loss": 0.7079, + "step": 19770 + }, + { + "epoch": 1.06, + "learning_rate": 3.6170443541337394e-05, + "loss": 0.7239, + "step": 19775 + }, + { + "epoch": 1.06, + "learning_rate": 3.616417806251877e-05, + "loss": 0.6381, + "step": 19780 + }, + { + "epoch": 1.06, + "learning_rate": 3.615791170768332e-05, + "loss": 0.6819, + "step": 19785 + }, + { + "epoch": 1.06, + "learning_rate": 3.615164447732274e-05, + "loss": 0.6381, + "step": 19790 + }, + { + "epoch": 1.06, + "learning_rate": 3.614537637192879e-05, + "loss": 0.7217, + "step": 19795 + }, + { + "epoch": 1.06, + "learning_rate": 3.6139107391993316e-05, + "loss": 0.7405, + "step": 19800 + }, + { + "epoch": 1.06, + "learning_rate": 3.6132837538008225e-05, + "loss": 0.6009, + "step": 19805 + }, + { + "epoch": 1.06, + "learning_rate": 3.6126566810465486e-05, + "loss": 0.7134, + "step": 19810 + }, + { + "epoch": 1.06, + "learning_rate": 3.612029520985715e-05, + "loss": 0.7042, + "step": 19815 + }, + { + "epoch": 1.06, + "learning_rate": 3.6114022736675315e-05, + "loss": 0.7595, + "step": 19820 + }, + { + "epoch": 1.06, + "learning_rate": 3.6107749391412184e-05, + "loss": 0.795, + "step": 19825 + }, + { + "epoch": 1.06, + "learning_rate": 3.610147517455999e-05, + "loss": 0.6147, + "step": 19830 + }, + { + "epoch": 1.06, + "learning_rate": 3.609520008661105e-05, + "loss": 0.7308, + "step": 19835 + }, + { + "epoch": 1.06, + "learning_rate": 3.6088924128057766e-05, + "loss": 0.6944, + "step": 19840 + }, + { + "epoch": 1.06, + "learning_rate": 3.608264729939257e-05, + "loss": 0.5859, + "step": 19845 + }, + { + "epoch": 1.06, + "learning_rate": 3.6076369601108e-05, + "loss": 0.5451, + "step": 19850 + }, + { + "epoch": 1.06, + "learning_rate": 3.607009103369663e-05, + "loss": 0.7287, + "step": 19855 + }, + { + "epoch": 1.06, + "learning_rate": 3.606381159765115e-05, + "loss": 0.7377, + "step": 19860 + }, + { + "epoch": 1.06, + "learning_rate": 3.605753129346425e-05, + "loss": 0.7201, + "step": 19865 + }, + { + "epoch": 1.06, + "learning_rate": 3.605125012162875e-05, + "loss": 0.5411, + "step": 19870 + }, + { + "epoch": 1.06, + "learning_rate": 3.604496808263751e-05, + "loss": 0.6901, + "step": 19875 + }, + { + "epoch": 1.06, + "learning_rate": 3.6038685176983445e-05, + "loss": 0.6943, + "step": 19880 + }, + { + "epoch": 1.06, + "learning_rate": 3.603240140515957e-05, + "loss": 0.7507, + "step": 19885 + }, + { + "epoch": 1.06, + "learning_rate": 3.6026116767658954e-05, + "loss": 0.7565, + "step": 19890 + }, + { + "epoch": 1.06, + "learning_rate": 3.601983126497472e-05, + "loss": 0.6515, + "step": 19895 + }, + { + "epoch": 1.06, + "learning_rate": 3.601354489760008e-05, + "loss": 0.6443, + "step": 19900 + }, + { + "epoch": 1.06, + "learning_rate": 3.600725766602831e-05, + "loss": 0.722, + "step": 19905 + }, + { + "epoch": 1.07, + "learning_rate": 3.600096957075273e-05, + "loss": 0.6648, + "step": 19910 + }, + { + "epoch": 1.07, + "learning_rate": 3.5994680612266756e-05, + "loss": 0.6729, + "step": 19915 + }, + { + "epoch": 1.07, + "learning_rate": 3.598839079106387e-05, + "loss": 0.7067, + "step": 19920 + }, + { + "epoch": 1.07, + "learning_rate": 3.598210010763761e-05, + "loss": 0.5048, + "step": 19925 + }, + { + "epoch": 1.07, + "learning_rate": 3.597580856248157e-05, + "loss": 0.7746, + "step": 19930 + }, + { + "epoch": 1.07, + "learning_rate": 3.596951615608945e-05, + "loss": 0.6867, + "step": 19935 + }, + { + "epoch": 1.07, + "learning_rate": 3.596322288895498e-05, + "loss": 0.8095, + "step": 19940 + }, + { + "epoch": 1.07, + "learning_rate": 3.5956928761571976e-05, + "loss": 0.5721, + "step": 19945 + }, + { + "epoch": 1.07, + "learning_rate": 3.595063377443433e-05, + "loss": 0.539, + "step": 19950 + }, + { + "epoch": 1.07, + "learning_rate": 3.5944337928035964e-05, + "loss": 0.5423, + "step": 19955 + }, + { + "epoch": 1.07, + "learning_rate": 3.593804122287091e-05, + "loss": 0.7047, + "step": 19960 + }, + { + "epoch": 1.07, + "learning_rate": 3.5931743659433253e-05, + "loss": 0.6379, + "step": 19965 + }, + { + "epoch": 1.07, + "learning_rate": 3.592544523821712e-05, + "loss": 0.671, + "step": 19970 + }, + { + "epoch": 1.07, + "learning_rate": 3.5919145959716765e-05, + "loss": 0.6506, + "step": 19975 + }, + { + "epoch": 1.07, + "learning_rate": 3.591284582442644e-05, + "loss": 0.725, + "step": 19980 + }, + { + "epoch": 1.07, + "learning_rate": 3.59065448328405e-05, + "loss": 0.9172, + "step": 19985 + }, + { + "epoch": 1.07, + "learning_rate": 3.590024298545338e-05, + "loss": 0.6289, + "step": 19990 + }, + { + "epoch": 1.07, + "learning_rate": 3.5893940282759555e-05, + "loss": 0.7067, + "step": 19995 + }, + { + "epoch": 1.07, + "learning_rate": 3.5887636725253574e-05, + "loss": 0.6789, + "step": 20000 + } + ], + "logging_steps": 5, + "max_steps": 56076, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 2.3139803180942623e+18, + "trial_name": null, + "trial_params": null +}